-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.js
85 lines (67 loc) · 2.42 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
'use strict';
var EventEmitter = require('events'),
_http = require('http'),
https = require('https'),
urlmod = require('url'),
mf2 = require('microformat-node'),
concat = require('concat-stream');
module.exports = function crawlMf2(baseUrl) {
var emitter = new EventEmitter();
emitter.on('urlDisco', function(url) {
var useHTTP = urlmod.parse(url).protocol === 'http:',
http = useHTTP ? _http : https;
var req = http.get(url);
req.on('error', err => emitter.emit('error', err));
req.on('response', function(res) {
// XXX handle non-2xx responses
res.pipe(concat(function(buf) {
mf2.get({html: buf.toString()}, function(err, data) {
if (err) {
emitter.emit('error', err);
return;
}
data.items.forEach(node => emitter.emit('mf2Parse', url, node));
});
}));
});
});
emitter.on('mf2Parse', function(url, node) {
// XXX don't hardcode that it's the first node
var type = node.type[0];
if (type === 'h-feed') {
emitter.emit('h-feed', url, node);
} else if (type === 'h-entry') {
emitter.emit('h-entry', url, node);
}
// XXX should we do something else if the type is unknown?
});
// XXX recognize infinite recursion, when the post links to itself
emitter.on('h-feed', function(url, node) {
node.children.forEach(function(child) {
// XXX check that the child is an h-entry?
// XXX don't assume the child has a URL
var postUrl = child.properties.url;
// XXX my original code had this and I honestly don't know why??
if (postUrl.length === 1) {
var resolvedUrl = urlmod.resolve(url, postUrl[0]);
emitter.emit('urlDisco', resolvedUrl);
}
});
});
emitter.on('h-entry', function(url, node) {
});
emitter.emit('urlDisco', baseUrl);
return emitter;
};