-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.js
69 lines (55 loc) · 2.13 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
const puppeteer = require('puppeteer');
const fs = require('fs').promises;
const baseUrl = 'https://pt.proxyservers.pro/proxy/';
const maxPages = 29;
async function scrapePage(pageNumber, browser) {
const url = `${baseUrl}list/order/updated/order_dir/desc/page/${pageNumber}`;
const page = await browser.newPage();
const proxies = [];
try {
await page.goto(url, { waitUntil: 'networkidle2' });
// Espera todos os elementos <a> e <span.port> serem carregados
const elements = await page.$$eval('a[title]', links => {
return links.map(link => {
const ip = link.getAttribute('title');
const portElement = link.parentElement.nextElementSibling.querySelector('span.port');
const port = portElement ? portElement.textContent.trim() : null;
return port ? `${ip}:${port}` : null;
}).filter(proxy => proxy !== null);
});
proxies.push(...elements);
console.log(`Página ${pageNumber} proxies:`, proxies);
} catch (error) {
console.error(`Erro ao fazer scraping da página ${pageNumber}:`, error);
} finally {
await page.close();
}
return proxies;
}
async function scrapeAllPages() {
const browser = await puppeteer.launch();
let allProxies = [];
for (let page = 1; page <= maxPages; page++) {
const proxies = await scrapePage(page, browser);
allProxies = allProxies.concat(proxies);
console.log(`Progresso: ${page}/${maxPages} páginas raspadas.`);
}
allProxies = [...new Set(allProxies)];
console.log('Todas as proxies coletadas:', allProxies);
await browser.close();
if (allProxies.length > 0) {
await saveProxiesToFile(allProxies);
} else {
console.log('Nenhuma proxy coletada.');
}
}
async function saveProxiesToFile(proxies) {
try {
await fs.writeFile('proxy_list.txt', proxies.join('\n'));
console.log('Proxies salvas no arquivo proxy_list.txt.');
} catch (error) {
console.error('Erro ao salvar proxies no arquivo:', error);
}
}
// Iniciar o scraping
scrapeAllPages();