This repository has been archived by the owner on Jan 3, 2020. It is now read-only.
forked from zotero/translators
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathThe Hindu (old).js
129 lines (121 loc) · 3.43 KB
/
The Hindu (old).js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
{
"translatorID": "9499c586-d672-42d6-9ec4-ee9594dcc571",
"label": "The Hindu (old)",
"creator": "Prashant Iyengar and Michael Berkowitz",
"target": "^https?://(www\\.)?hindu\\.com/",
"minVersion": "1.0.0b4.r5",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2015-06-02 21:30:30"
}
function detectWeb(doc, url) {
if (doc.evaluate('//h2[@class="r"]/a[@class="l"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
return "multiple";
} else {
return "newspaperArticle";
}
}
function regexMeta(str, item) {
var re = /NAME\=\"([\w\W]*?)\"\s+CONTENT\=\"([\w\W]*?)\"/;
var stuff = str.match(re);
if (stuff)
{
if (stuff[1] == "PAGEHEAD") {
item.section = stuff[2].split(/\s+/)[0];
}
if (stuff[1] == "ZONE") {
item.place = stuff[2].split(/\s+/)[0];
}
if (stuff[1] == "EXPORTTIME") {
item.date = stuff[2].split(/\s+/)[0];
}
if (stuff[1] == "PAGENUMBER") {
item.pages = stuff[2].split(/\s+/)[0];
}
}
}
function doWeb(doc, url) {
var arts = new Array();
if (detectWeb(doc, url) == "multiple") {
var xpath = '//h2[@class="r"]/a[@class="l"]';
var links = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
var link;
var items = new Object();
while (link = links.iterateNext()) {
items[link.href] = link.textContent;
}
items = Zotero.selectItems(items);
for (var i in items) {
arts.push(i);
}
} else { arts = [url]; }
for (var i=0; i<arts.length; i++) {
var art = arts[i];
Zotero.debug(art);
Zotero.Utilities.HTTP.doGet(art, function(text) {
var newItem = new Zotero.Item("newspaperArticle");
newItem.publicationTitle = "The Hindu";
newItem.url = art;
//title
var t = /\<TITLE\>[\w\W]*\:([\w\W]*?)<\/TITLE/;
newItem.title = Zotero.Utilities.unescapeHTML(Zotero.Utilities.capitalizeTitle(text.match(t)[1]));
var auth = /\<font class\=storyhead[\w\W]*?justify\>([\w\W]*?)\<p\>/;
if (text.match(auth))
{
//newItem.author=Zotero.Utilities.cleanAuthor(text.match(auth)[1]);
cleanauth=Zotero.Utilities.cleanTags(text.match(auth)[1]);
newItem.creators.push(Zotero.Utilities.cleanAuthor(cleanauth, "author"));
}
newItem.publicationTitle="The Hindu";
newItem.attachments = [{"title":"The Hindu Snapshot", mimeType:"text/html", url:art}];
//hooray for real meta tags!
var meta = /<META NAME[\w\W]*?\>/g;
var metaTags = text.match(meta);
for (var j = 0 ; j <metaTags.length ; j++) {
regexMeta(metaTags[j], newItem);
}
newItem.complete();
Zotero.done();
});
Zotero.wait();
}
}/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://www.hindu.com/lr/2004/01/04/stories/2004010400030100.htm",
"items": [
{
"itemType": "newspaperArticle",
"creators": [
{
"firstName": "To be torn between two languages, discovers H. MASUD TAJ, is to drown soul-deep in the",
"lastName": "present",
"creatorType": "author"
}
],
"notes": [],
"tags": [],
"seeAlso": [],
"attachments": [
{
"title": "The Hindu Snapshot",
"mimeType": "text/html"
}
],
"publicationTitle": "The Hindu",
"url": "http://www.hindu.com/lr/2004/01/04/stories/2004010400030100.htm",
"title": "Falling at the speed of light",
"date": "01-01-2004",
"pages": "01",
"place": "CHEN",
"section": "LITERARY",
"libraryCatalog": "The Hindu (old)"
}
]
}
]
/** END TEST CASES **/