-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.java
95 lines (82 loc) · 2.78 KB
/
test.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
public class AnnotationCs implements AfterExtractor {
@ExtractBy("//div[@id=\"content\"]")
private String content;
@ExtractByUrl(value = "(?<=book_112633/)\\d*")
private String urlpageinfo;
@ExtractBy(value = "//h1/text()",notNull = true)
private String title;
@ExtractBy("//div[@id='list']//dd/a/@href")
private List<String> url;
@ExtractByUrl(".*")
private String danduurl;
public List<String> getUrl() {
return url;
}
public void setUrl(List<String> url) {
this.url = url;
}
public String getDanduurl() {
return danduurl;
}
public void setDanduurl(String danduurl) {
this.danduurl = danduurl;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getUrlpageinfo() {
return urlpageinfo;
}
public void setUrlpageinfo(String urlpageinfo) {
this.urlpageinfo = urlpageinfo;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
@Override
public void afterProcess(Page page) {
if (url!=null){
Iterator<String> iterator = url.iterator();
int i = 0;
while (iterator.hasNext()){
String next = iterator.next();
url.set(i,"https://www.52bqg.com/book_112633/"+next);
i = i+1;
}
page.addTargetRequests(url);
}
// if (getDanduurl()=="https://www.52bqg.com/book_112633"){
// Iterator<String> iterator = getUrl().iterator();
// int i = 0;
// while (iterator.hasNext()){
// String next = iterator.next();
// getUrl().set(i,"https://www.52bqg.com/book_112633/"+next);
// i = i+1;
// }
// List<String> url2 = getUrl();
//
// IXiaoshuoMapper mapper = Applicacs.getixiaoshuomapper();
// System.out.println("查漏开始:"+url2.get(0));
// Iterator<String> iterator1 = url2.iterator();
// while (iterator1.hasNext()){
// String urltoget = iterator1.next();
// if (mapper.findurl(urltoget)==0){
// System.out.println("漏掉的url:"+urltoget);
// }
// }
// }
}
public static void main(String[] args) throws IOException {
OOSpider.create(Site.me().setSleepTime(1000)
, new AnnotationCsPipline(), AnnotationCs.class)
.addUrl("https://www.52bqg.com/book_112633")
.thread(6).setScheduler(new QueueScheduler()
.setDuplicateRemover(new BloomFilterDuplicateRemover(1000))).run();
}
}