-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathcrawler.js
69 lines (59 loc) · 1.52 KB
/
crawler.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
/**!
* haixiu - crawler.js
*
*/
'use strict';
/**
* Module dependencies.
*/
const Douban = require('./lib/douban');
const config = require('./config');
const model = require('./model');
const _ = require('lodash');
const co = require('co');
const DB = new Douban({
apikey: config.apikey,
});
const Post = model.Post;
function onerror(err) {
console.error(err.stack);
console.log(err);
}
function* handleTopic(topic) {
topic = topic || {};
let topicId = topic.id;
let imgs = _.pluck(topic.photos, 'alt');
let exists = yield Post.findOne({id: topicId}).exec();
if (exists) {
imgs = _.union(imgs, exists.imgs);
}
let post = {
id: topicId,
url: `http://www.douban.com/group/topic/${topicId}/`,
title: topic.title,
imgs: imgs,
author_id: topic.authorInfo.id,
author_name: topic.authorInfo.name,
author_url: topic.authorInfo.alt,
author_location: topic.authorInfo.loc_name || '',
update_at: new Date(),
};
return yield Post.update({id: topicId}, post, {upsert: true}).exec();
}
function fetchHaixiuzu() {
co(function* () {
for (let page = 1; page <= config.fetchPage; page++) {
let topics = DB.groupTopic(config.groupName, page);
for (let i = 0; i < topics.length; i++) {
let topic = topics[i];
topic.authorInfo = DB.user((topic.author || {}).id);
yield handleTopic(topic);
}
}
}).catch(onerror);
}
exports.start = function () {
fetchHaixiuzu();
// 每10分钟运行一次
setInterval(fetchHaixiuzu, 10 * 60 * 1000);
};