-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract.js
175 lines (147 loc) · 5.02 KB
/
extract.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
document.addEventListener("DOMContentLoaded", () => {
let url = document.getElementById("url");
let btn = document.getElementById("submit_btn");
let table = document.getElementById("data");
let tbody = document.getElementById("tbody");
let clear_btn = document.getElementById("clear");
let filter_btn = document.getElementById("filter_btn");
let unfilter_btn = document.getElementById("unfilter_btn");
let filter = document.getElementById("filter");
let save = document.getElementById("save");
let url_link_mem = '';
let table_mem = '';
let text_btn = document.getElementById("textify");
let back_btn = document.getElementById("back");
let filtered_mem = '';
let content_cont = document.getElementById("content");
let exc_btn = document.getElementById("exc_btn");
let exc_unfil_btn = document.getElementById("exc_unfil_btn");
let exc = document.getElementById("exc");
exc_btn.addEventListener("click",()=>{
let exc_crit = exc.value;
exc.value = '';
exclude(exc_crit);
})
function exclude(crit){
let rows = tbody.querySelectorAll("tr");
let rowsToRemove = [];
for (let row of rows) {
let type = row.querySelector("#type");
if (type && type.textContent.toLowerCase().includes(crit.toLowerCase())) {
rowsToRemove.push(row);
}
}
rowsToRemove.forEach(row => tbody.removeChild(row));
updateSn();
}
exc_unfil_btn.addEventListener("click",()=>{
tbody.innerHTML = table_mem;
})
btn.addEventListener("click", () => {
let url_link = url.value;
url.value = "";
url_link_mem = url_link;
fetchHTML(url_link);
});
clear_btn.addEventListener("click", () => {
clearTable();
})
save.addEventListener("click",()=>{
table_mem = tbody.innerHTML;
})
text_btn.addEventListener("click",()=>{
let rows = tbody.querySelectorAll("tr");
for (let row of rows){
let title_text = row.querySelector("#title");
let line = document.createElement("p");
line.innerHTML = title_text.innerHTML;
content_cont.appendChild(line);
}
filtered_mem = table.innerHTML;
table.innerHTML = '';
})
back_btn.addEventListener("click",()=>{
table.innerHTML = filtered_mem;
tbody = document.getElementById("tbody");
let paras = content_cont.querySelectorAll("p");
for (let p of paras){
content_cont.removeChild(p);
}
})
function clearTable(){
tbody.innerHTML = '';
}
filter_btn.addEventListener("click",()=>{
let filter_crit = filter.value;
filter.value = '';
filterBy(filter_crit);
})
unfilter_btn.addEventListener("click",()=>{
tbody.innerHTML = table_mem;
})
function filterBy(crit){
let rows = tbody.querySelectorAll("tr");
let rowsToRemove = [];
for (let row of rows) {
let type = row.querySelector("#type");
if (type && !type.textContent.toLowerCase().includes(crit.toLowerCase())) {
rowsToRemove.push(row);
}
}
rowsToRemove.forEach(row => tbody.removeChild(row));
updateSn();
}
function updateSn(){
let rows = table.querySelectorAll("tr");
let sn = 1;
for (let row of rows){
let snCell = row.querySelector("#sn");
if (snCell){
snCell.textContent = sn.toString();
sn++;
}
}
}
async function fetchHTML(url) {
try {
// Fetch the HTML content
const response = await fetch(url);
const htmlString = await response.text();
// Parse the HTML content
const parser = new DOMParser();
const doc = parser.parseFromString(htmlString, 'text/html');
// Extract information
const arts = doc.querySelectorAll('article');
for (let art of arts) {
let h5 = art.querySelector("div > div > h5 > a");
let type_text = art.querySelector("article > div:nth-of-type(3) > div:nth-of-type(1)");
let date_text = art.querySelector("article > div:nth-of-type(3) > div:nth-of-type(2)");
let author_text = art.querySelector("article > div:nth-of-type(2)").textContent;
let link_text = h5.href;
let title_text = h5.textContent;
let tr = document.createElement("tr");
let sn = document.createElement("td");
sn.id = "sn";
let title = document.createElement("td");
title.id = "title";
let link = document.createElement("td");
let type = document.createElement("td");
let date = document.createElement("td");
date.textContent = date_text.textContent;
type.id = "type";
link.textContent = link_text;
title.innerHTML = `${title_text}<br><b>${author_text}</b>`;
type.textContent = type_text.textContent;
tr.appendChild(sn);
tr.appendChild(title);
tr.appendChild(link);
tr.appendChild(type);
tr.appendChild(date);
tbody.appendChild(tr);
updateSn();
}
} catch (error) {
console.error('Error fetching the HTML:', error);
}
}
});