-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeeds2html.ml
738 lines (648 loc) · 25.1 KB
/
feeds2html.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
(** Get RSS/Atom feeds and convert them to HTML *)
open Printf
open Nethtml
open Syndic
let planet_url = ref ""
let planet_full_url = ref ""
let planet_feeds_file = ref ""
(* Utils
***********************************************************************)
let failsafe_flag = ref false
let failsafe ~default f =
if !failsafe_flag then
try f () with _ -> default
else
f ()
let unique (type x) (compare: x -> x -> int) l =
let module S = Set.Make(struct type t = x let compare = compare end) in
let rec loop s r = function
| [] -> r
| e::l ->
if S.mem e s then
loop s r l
else
loop (S.add e s) (e::r) l
in
loop S.empty [] l
let unique_most_recent_entries l =
let module S =
Set.Make(
struct
type t = Atom.entry
let compare (a:t) (b:t) =
Pervasives.compare a.Atom.id b.Atom.id
end)
in
let rec loop s = function
| [] ->
List.filter (fun e -> S.mem e s) l
| hd::tl ->
match S.find hd s with
| x ->
(match Syndic_date.compare hd.Atom.updated x.Atom.updated with
| n when n > 0 -> loop (S.add hd s) tl
| _ -> loop s l)
| exception Not_found ->
loop (S.add hd s) tl
in
loop S.empty l
type html = Nethtml.document list
let encode_html =
Netencoding.Html.encode ~prefer_name:false ~in_enc:`Enc_utf8 ()
let encode_document html = Nethtml.encode ~enc:`Enc_utf8 html
(* Remove all tags *)
let rec syndic_to_buffer b = function
| XML.Node (_, _, subs) -> List.iter (syndic_to_buffer b) subs
| XML.Data (_, d) -> Buffer.add_string b d
let syndic_to_string x =
let b = Buffer.create 1024 in
List.iter (syndic_to_buffer b) x;
Buffer.contents b
let string_of_text_construct : Atom.text_construct -> string = function
(* FIXME: we probably would like to parse the HTML and remove the tags *)
| Atom.Text s | Atom.Html(_,s) -> s
| Atom.Xhtml(_, x) -> syndic_to_string x
let rec resolve ?xmlbase html =
List.map (resolve_links_el ~xmlbase) html
and resolve_links_el ~xmlbase = function
| Nethtml.Element("a", attrs, sub) ->
let attrs = match List.partition (fun (t,_) -> t = "href") attrs with
| [], _ -> attrs
| (_, h) :: _, attrs ->
let src = Uri.to_string(XML.resolve xmlbase (Uri.of_string h)) in
("href", src) :: attrs in
Nethtml.Element("a", attrs, resolve ?xmlbase sub)
| Nethtml.Element("img", attrs, sub) ->
let attrs = match List.partition (fun (t,_) -> t = "src") attrs with
| [], _ -> attrs
| (_, src) :: _, attrs ->
let src = Uri.to_string(XML.resolve xmlbase (Uri.of_string src)) in
("src", src) :: attrs in
Nethtml.Element("img", attrs, sub)
| Nethtml.Element(e, attrs, sub) ->
Nethtml.Element(e, attrs, resolve ?xmlbase sub)
| Data _ as d -> d
(* Things that posts should not contain *)
let undesired_tags = ["style"; "script"]
let undesired_attr = ["id"]
let remove_undesired_attr =
List.filter (fun (a,_) -> not(List.mem a undesired_attr))
let rec remove_undesired_tags html =
Utils.filter_map html remove_undesired_tags_el
and remove_undesired_tags_el = function
| Nethtml.Element(t, a, sub) ->
if List.mem t undesired_tags then None
else Some(Nethtml.Element(t, remove_undesired_attr a,
remove_undesired_tags sub))
| Data _ as d -> Some d
let html_of_text ?xmlbase s =
try Nethtml.parse (new Netchannels.input_string s)
~dtd:Utils.relaxed_html40_dtd
|> resolve ?xmlbase
|> remove_undesired_tags
with _ ->
[Nethtml.Data(encode_html s)]
(* Do not trust sites using XML for HTML content. Convert to string
and parse back. (Does not always fix bad HTML unfortunately.) *)
let rec html_of_syndic =
let ns_prefix _ = Some "" in
fun ?xmlbase h ->
html_of_text ?xmlbase
(String.concat "" (List.map (XML.to_string ~ns_prefix) h))
(* Feeds
***********************************************************************)
(* Email on the forge contains the name in parenthesis *)
let email_name_re =
Str.regexp " *\\([a-zA-Z.]+@[a-zA-Z.]+\\) *(\\([^()]*\\))"
let author_email_name (a: Atom.author) =
let open Atom in
if Str.string_match email_name_re a.name 0 then
let name = String.trim(Str.matched_group 2 a.name) in
let email = match a.email with
| None -> Some(Str.matched_group 1 a.name)
| Some _ -> a.email in
{ a with name; email }
else a
let special_processing (e: Atom.entry) =
let open Atom in
let a0, a = e.authors in
if a0.name = "OCaml Weekly News" then
{ e with title = Text "Weekly News" }
else
{ e with authors = (author_email_name a0,
List.map author_email_name a) }
(* Atom feed (with no entries) representing a broken feed. The title
is the reason for the failure. Since these feed contain no
entries, the aggregation will remove them. *)
let broken_feed name url reason =
let feed = Atom.feed ~id:(Uri.of_string(Digest.to_hex(Digest.string name)))
~authors:[Atom.author name]
~title:(Atom.Text reason)
~updated:(CalendarLib.Calendar.now())
[] in
(* See Syndic.Opml1.of_atom for the convention on the length. *)
Atom.set_self_link feed url ~length:(-1)
module Atom = struct
include Atom
let empty = {
authors = [];
categories = [];
contributors = [];
generator = None;
icon = None;
id = Uri.of_string"empty" ;
links = [];
logo = None;
rights = None;
subtitle = None;
title = Text "";
updated = CalendarLib.Calendar.now();
entries = [];
}
end
let feed_of_url ~name url =
failsafe ~default:Atom.empty (fun () ->
try
let xml = `String(0, Http.get(Uri.to_string url)) in
let feed =
try Atom.parse ~self:url ~xmlbase:url (Xmlm.make_input xml)
with Atom.Error.Error _ ->
Rss2.parse ~xmlbase:url (Xmlm.make_input xml)
|> Rss2.to_atom ~self:url in
let feed = Atom.set_main_author feed (Atom.author name) in
{ feed
with
Atom.entries =
unique Pervasives.compare
(List.map special_processing feed.Atom.entries) }
with
| Rss2.Error.Error _ ->
broken_feed name url "Neither an Atom nor a RSS2 feed"
| Nethttp_client.Http_protocol(Nethttp_client.Timeout s)
| Nethttp_client.Http_protocol(Nethttp_client.Name_resolution_error s) ->
broken_feed name url s
| Nethttp_client.Http_protocol Nethttp_client.Too_many_redirections ->
broken_feed name url "Too many redirections"
| Nethttp_client.Http_protocol e ->
broken_feed name url (Printexc.to_string e)
| Nethttp_client.Http_error(err, _) ->
let msg = Nethttp.(string_of_http_status (http_status_of_int err)) in
broken_feed name url msg
)
let planet_feeds =
let add_feed acc line =
try
let i = String.index line '|' in
let name = String.sub line 0 i in
let url = String.sub line (i+1) (String.length line - i - 1) in
feed_of_url ~name (Uri.of_string url) :: acc
with Not_found -> acc in
ref(
lazy(
if !planet_feeds_file <> "" then
List.fold_left add_feed [] (Utils.lines_of_file !planet_feeds_file)
else
[]
)
)
let add_feed name url =
failsafe
~default:()
(fun () ->
let p = !planet_feeds in
planet_feeds := lazy (
let p = Lazy.force p in
failsafe ~default:p
(fun () -> feed_of_url ~name (Uri.of_string url) :: p)
)
)
let get_opml () =
let feeds = Lazy.force !planet_feeds in
let date_modified =
List.fold_left (fun d f -> Date.max d f.Atom.updated) Date.epoch feeds in
let head = Syndic.Opml1.head ~date_modified
~owner_name:"ocaml.org"
~owner_email:"[email protected]"
"OCaml Planet" in
(* Broken feeds will be marked with [is_comment = true]. *)
let opml = Opml1.of_atom ~head feeds in
(* Sort by name. (FIXME: one may want to ignore spaces.) *)
let by_name o1 o2 = String.compare o1.Opml1.text o2.Opml1.text in
{ opml with Opml1.body = List.sort by_name opml.Opml1.body }
let opml fname =
Opml1.write (get_opml()) fname
let html_contributors () =
let open Opml1 in
let contrib_html (o: Opml1.outline) =
match o.xml_url with
| Some xml_url ->
let attrs = ("href", Uri.to_string xml_url)
:: List.map (fun ((_,n), v) -> (n,v)) o.attrs in
let attrs = if o.is_comment then ("class", "broken") :: attrs
else attrs in
Element("li", [], [Element("a", attrs, [Data o.text])])
| None -> Element("li", [], [Data o.text]) in
[Element("ul", [], List.map contrib_html (get_opml()).body)]
(* Blog feed
***********************************************************************)
let digest_post (e: Atom.entry) =
Digest.to_hex (Digest.string (Uri.to_string(e.Atom.id)))
let get_alternate_link (e: Atom.entry) =
let open Atom in
try Some (List.find (fun l -> l.rel = Alternate) e.links).href
with Not_found -> match e.links with
| l :: _ -> Some l.href
| [] -> None
let get_feed_url (e: Atom.entry) =
let open Atom in
match e.source with
| None -> None
| Some s ->
try Some(List.find (fun l -> l.rel = Self) s.links).href
with Not_found -> None
let get_date (e: Atom.entry) =
match e.Atom.published with
| Some _ -> e.Atom.published
| None -> Some e.Atom.updated
let get_story (e: Atom.entry) : html =
let open Atom in
match e.content with
| Some(Text s) -> html_of_text s
| Some(Html(xmlbase, s)) -> html_of_text ?xmlbase s
| Some(Xhtml(xmlbase, h)) -> html_of_syndic ?xmlbase h
| Some(Mime _) | Some(Src _)
| None ->
match e.summary with
| Some(Text s) -> html_of_text s
| Some(Html(xmlbase, s)) -> html_of_text ?xmlbase s
| Some(Xhtml(xmlbase, h)) -> html_of_syndic ?xmlbase h
| None -> []
(* Limit the length of the description presented to the reader. *)
let rec length_html html =
List.fold_left (fun l h -> l + length_html_el h) 0 html
and length_html_el = function
| Element(_, _, content) -> length_html content
| Data d -> String.length d
let rec text_of_html html =
String.concat "" (List.map text_of_el html)
and text_of_el = function
| Element(_, _, content) -> text_of_html content
| Data d -> d
let rec len_prefix_of_html html len =
if len <= 0 then 0, []
else match html with
| [] -> len, []
| el :: tl -> let len, prefix_el = len_prefix_of_el el len in
let len, prefix_tl = len_prefix_of_html tl len in
len, prefix_el :: prefix_tl
and len_prefix_of_el el len =
match el with
| Data d ->
let len' = len - String.length d in
len', (if len' >= 0 then el else Data(String.sub d 0 len ^ "…"))
| Element(tag, args, content) ->
(* Remove "id" and "name" to avoid duplicate anchors with the
whole post. *)
let args = List.filter (fun (n,_) -> n <> "id" && n <> "name") args in
let len, prefix_content = len_prefix_of_html content len in
len, Element(tag, args, prefix_content)
let rec prefix_of_html html len =
snd(len_prefix_of_html html len)
let new_id =
let id = ref 0 in
fun () -> incr id; sprintf "ocamlorg-post%i" !id
(* [toggle html1 html2] return some piece of html with buttons to pass
from [html1] to [html2] and vice versa. *)
let toggle ?(anchor="") html1 html2 =
let button id1 id2 text =
Element("a", ["onclick", sprintf "switchContent('%s','%s')" id1 id2;
"class", "btn planet-toggle";
"href", "#" ^ anchor],
[Data text])
in
let id1 = new_id() and id2 = new_id() in
[Element("div", ["id", id1],
html1 @ [button id1 id2 "Read more..."]);
Element("div", ["id", id2; "style", "display: none"],
html2 @ [button id2 id1 "Hide"])]
let toggle_script =
let script =
"function switchContent(id1,id2) {
// Get the DOM reference
var contentId1 = document.getElementById(id1);
var contentId2 = document.getElementById(id2);
// Toggle
contentId1.style.display = \"none\";
contentId2.style.display = \"block\";
}\n" in
[Element("script", ["type", "text/javascript"], [Data script])]
let html_of_author (a: Atom.author) =
let open Atom in
match a.email with
| None | Some "" -> Data a.name
| Some email -> Element("a", ["href", "mailto:" ^ email], [Data a.name])
let rec html_of_authors = function
| [] -> []
| [a] -> [html_of_author a]
| a :: tl -> html_of_author a :: Data ", " :: html_of_authors tl
(* In addition to the feed name, print the author name (general feed
used by several authors). *)
let keep_entry_author feed_author (a: Atom.author) =
let open Atom in
a.name <> "" && a.name <> feed_author
(* FIXME: maybe we want to be more subtle by checking for word boundaries: *)
&& not(Utils.KMP.is_substring ~pat:a.name feed_author)
&& not(Utils.KMP.is_substring ~pat:feed_author a.name)
let html_author_of_post (e: Atom.entry) =
let open Atom in
(* Only use the first source author (the one we set), the
subsequent feed authors will be basically duplicates. *)
let feed_author = match e.source with
| Some s -> (match s.authors with a0 :: _ -> Some a0
| [] -> None)
| None -> None in
let a0, a = e.authors in
let authors = match feed_author with
| Some feed_a -> List.filter (keep_entry_author feed_a.name) (a0 :: a)
| None -> a0 :: a in
match authors, feed_author with
| _ :: _, Some feed_author ->
html_of_author feed_author
:: Data " (" :: html_of_authors authors @ [Data ")" ]
| _ :: _, None -> html_of_authors authors
| [], Some feed_author -> [html_of_author feed_author]
| [], None -> []
let html_date_of_post e =
match get_date e with
| None -> []
| Some d ->
let date =
let open Syndic.Date in
sprintf "%s %02d, %d" (string_of_month(month d)) (day d) (year d) in
[Data date]
(* Transform a post [p] (i.e. story) into HTML. *)
let html_of_post e =
let title_anchor = digest_post e in
let title = string_of_text_construct e.Atom.title in
let html_title, share = match get_alternate_link e with
| None -> [Data title], []
| Some u ->
let url_orig = Uri.to_string u in
let a_args = ["href", url_orig; "target", "_blank";
"title", "Go to the original post"] in
let post =
Netencoding.Url.encode (!planet_full_url ^ "#" ^ title_anchor) in
let google = ["href", "https://plus.google.com/share?url="
^ (Netencoding.Url.encode url_orig);
"target", "_blank"; "title", "Share on Google+"] in
let fb = ["href", "https://www.facebook.com/share.php?u=" ^ post
^ "&t=" ^ (Netencoding.Url.encode title);
"target", "_blank"; "title", "Share on Facebook"] in
let tw = ["href", "https://twitter.com/intent/tweet?url=" ^ post
^ "&text=" ^ (Netencoding.Url.encode title);
"target", "_blank"; "title", "Share on Twitter"] in
let rss =
match get_feed_url e with
| Some feed ->
[Element("a", ["class", "rss"; "target", "_blank";
"title", "Original RSS feed";
"href", Uri.to_string feed],
[Element("img", ["src", "/img/rss.svg"; "alt", "RSS";
"class", "svg"], []);
Element("img", ["src", "/img/rss.png"; "alt", "RSS";
"class", "png"], [])] )]
| None -> [] in
[Element("a", a_args, [Data(string_of_text_construct e.Atom.title)]) ],
[Element("span", ["class", "share"],
Element("a", a_args,
[Element("img", ["src", "/img/chain-link-icon.png";
"alt", ""], []) ])
:: Element("a", ("class", "googleplus") :: google,
[Element("img", ["src", "/img/googleplus.png";
"alt", "Google+"], []) ])
:: Element("a", ("class", "facebook") :: fb,
[Element("img", ["src", "/img/facebook.png";
"alt", "FB"], []) ])
:: Element("a", ("class", "twitter") :: tw,
[Element("img", ["src", "/img/twitter.png";
"alt", "Twitter"], []) ])
:: rss) ] in
let sep = Data " — " in
let additional_info = match html_author_of_post e, html_date_of_post e with
| [], [] -> []
| html_author, [] -> sep :: html_author
| [], date -> sep :: date
| html_author, date -> sep :: (html_author @ (Data ", " :: date)) in
let additional_info =
[Element("span", ["class", "additional-info"], additional_info)] in
let story = get_story e in
let story =
if length_html story < 500 then story
else toggle (prefix_of_html story 500) story ~anchor:title_anchor
in
[Data "\n";
Element("a", ["name", title_anchor], []);
Element("section", ["style", "clear: both"],
[Element("h1", ["class", "ruled planet"],
share @ html_title @ additional_info);
Element("div", ["class", "planet-post"], story)]);
Data "\n"]
let li_of_post (e: Atom.entry) =
let sep = Data " — " in
let title = string_of_text_construct e.Atom.title in
let title = match get_alternate_link e with
| None -> [Data title]
| Some u -> [Element("a", ["href", Uri.to_string u; "target", "_blank";
"title", "Go to the original post"],
[Data title]) ] in
let line = match html_author_of_post e, html_date_of_post e with
| [], [] -> title
| html_author, [] -> title @ (sep :: html_author)
| [], date -> date @ (Data "," :: title)
| html_author, date ->
date @ (Data ", " :: title @ (sep :: html_author)) in
Element("li", [], line)
let netdate_of_calendar d =
let month =
let open Syndic.Date in
match month d with
| Jan -> 1 | Feb -> 2 | Mar -> 3 | Apr -> 4 | May -> 5 | Jun -> 6
| Jul -> 7 | Aug -> 8 | Sep -> 9 | Oct -> 10 | Nov -> 11 | Dec -> 12 in
{ Netdate.year = Syndic.Date.year d;
month;
day = Syndic.Date.day d;
hour = Syndic.Date.hour d;
minute = Syndic.Date.minute d;
second = truncate(Syndic.Date.second d);
nanos = 0; zone = 0; week_day = -1 }
(* Similar to [html_of_post] but tailored to be shown in a list of
news (only titles are shown, linked to the page with the full story). *)
let headline_of_post ?(planet=false) ?(img_alt="") ~l9n ~img e =
let link =
if planet then !planet_url ^ "#" ^ digest_post e
else match get_alternate_link e with
| Some l -> Uri.to_string l
| None -> "" in
let html_icon =
[Element("a", ["href", link],
[Element("img", ["src", img ^ ".svg"; "class", "svg";
"alt", img_alt], []);
Element("img", ["src", img ^ ".png"; "class", "png";
"alt", img_alt], [])])] in
let html_date = match get_date e with
| None -> html_icon
| Some d ->
(* Netdate internationalization functions are more developed. *)
let d =
let d = netdate_of_calendar d in
if Netdate.format ~fmt:"%x" d = Netdate.format ~fmt:"%x" d ~l9n then
(* English style *)
Netdate.format ~fmt:"%B %e, %Y" d ~l9n
else
Netdate.format ~fmt:"%e %B %Y" d ~l9n in
Element("p", [], [Data d]) :: html_icon in
let title = string_of_text_construct e.Atom.title in
let html_title =
Element("h1", [],
if link = "" then [Data title]
else [Element("a", ["href", link; "title", title],
[Data title])] )in
[Element("li", [], [Element("article", [], html_title :: html_date)]);
Data "\n"]
let rec remove n l =
if n <= 0 then l
else match l with [] -> []
| _ :: tl -> remove (n - 1) tl
let rec take n = function
| [] -> []
| e :: tl -> if n > 0 then e :: take (n-1) tl else []
let aggregated_feed =
lazy(Atom.aggregate (Lazy.force !planet_feeds)
~sort:`Newest_first
~title:(Atom.Text "OCaml Planet"))
let get_posts ?n ?(ofs=0) () =
let feed = Lazy.force aggregated_feed in
let entries = remove ofs feed.Atom.entries in
let entries = match n with
| None -> entries
| Some n -> take n entries in
{ feed with
Atom.entries =
unique
(fun (a:Atom.entry) (b:Atom.entry) ->
compare a.Syndic_atom.id b.Syndic_atom.id)
entries }
let headlines ?n ?ofs ?planet ~l9n () =
let posts = (get_posts ?n ?ofs ()).Atom.entries in
let img = "/img/news" in
[Element("ul", ["class", "news-feed"],
List.concat(List.map (headline_of_post ?planet ~l9n ~img) posts))]
let posts ?n ?ofs () =
let posts = (get_posts ?n ?ofs ()).Atom.entries in
[Element("div", [], List.concat(List.map html_of_post posts))]
let nposts () = List.length (get_posts ()).Atom.entries
let en_string_of_month =
let open Syndic.Date in
function
| Jan -> "January"
| Feb -> "February"
| Mar -> "March"
| Apr -> "April"
| May -> "May"
| Jun -> "June"
| Jul -> "July"
| Aug -> "August"
| Sep -> "September"
| Oct -> "October"
| Nov -> "November"
| Dec -> "December"
module Year_Month = struct
type t = int * Syndic.Date.month (* year, month *)
let compare ((y1, m1): t) ((y2, m2): t) =
let dy = compare y1 y2 in
if dy = 0 then compare m1 m2 else dy
end
module DMap = Map.Make(Year_Month)
let list_of_posts ?n ?ofs () =
let posts = (get_posts ?n ?ofs ()).Atom.entries in
(* Split posts per year/month *)
let classify m e =
match get_date e with
| None -> m (* drop *)
| Some d ->
let key = (Syndic.Date.year d, Syndic.Date.month d) in
let posts = try e :: DMap.find key m with Not_found -> [e] in
DMap.add key posts m in
let m = List.fold_left classify DMap.empty posts in
let add_html (year, month) posts html =
let title = en_string_of_month month ^ " " ^ string_of_int year in
let posts = List.rev posts in (* posts originally sorted by date *)
Element("h2", ["id", title], [Data title])
:: Element("ul", [], List.map li_of_post posts)
:: html in
(* Older date considered first => final HTML has more recent dates first *)
DMap.fold add_html m []
(* Aggregation of posts
***********************************************************************)
let aggregate ?n fname =
Atom.write (get_posts ?n ()) fname
(* Main
***********************************************************************)
let () =
let action = ref `Posts in
let n_posts = ref None in (* means unlimited *)
let ofs_posts = ref 0 in
let l9n = ref Netdate.posix_l9n in
let specs = [
("--feeds-list-file", Arg.String(fun s -> planet_feeds_file := s),
" Set the file that contains the list of feeds)");
("--stdin", Arg.Unit(fun () -> planet_feeds_file := "/dev/stdin"),
" Set stdin as the file that contains the list of feeds)");
("--headlines", Arg.Unit(fun () -> action := `Headlines),
" Feeds to feed summary (in HTML)");
("--subscribers", Arg.Unit(fun () -> action := `Subscribers),
" list of subscribers (rendered to HTML if alone)");
("--posts", Arg.Unit(fun () -> action := `Posts),
" Feeds to HTML (default action)");
("--list", Arg.Unit(fun () -> action := `List),
" Feeds to a single HTML");
("--nposts", Arg.Unit(fun () -> action := `NPosts),
" number of posts in the feed");
("--opml", Arg.String(fun fn -> action := `Opml fn),
"fname write an OMPL document to the given file");
("--aggregate", Arg.String(fun fn -> action := `Aggregate fn),
"fname write the aggregated feed to the given file");
("-n", Arg.Int(fun n -> n_posts := Some n),
"n limit the number of posts to n (default: all of them)");
("--ofs", Arg.Set_int ofs_posts,
"n start at the n th post (first is numbered 0)");
("--locale",
Arg.String(fun l -> l9n := Netdate.(l9n_from_locale l)),
"l Translate dates for the locale l");
("--failsafe", Arg.Set(failsafe_flag),
" Activate failsafe mode");
]
in
let anon_arg s =
add_feed s s
in
Arg.parse (Arg.align specs) anon_arg "RSS and Atom feeds to HTML";
let l9n = Netdate.compile_l9n !l9n in
let out = new Netchannels.output_channel stdout in
(match !action with
| `Headlines ->
Nethtml.write out (headlines ~planet:true ?n:!n_posts ~ofs:!ofs_posts
~l9n ())
| `Posts -> Nethtml.write out (toggle_script
@ posts ?n:!n_posts ~ofs:!ofs_posts ())
| `List -> Nethtml.write out (list_of_posts ?n:!n_posts ~ofs:!ofs_posts ())
| `NPosts -> printf "%i" (nposts())
| `Subscribers -> Nethtml.write out (html_contributors())
| `Opml fn -> opml fn
| `Aggregate fn -> aggregate fn ?n:!n_posts
);
print_newline();
out#close_out()
(* Local Variables: *)
(* compile-command: "ocamlfind ocamlopt.opt -package netstring,nettls-gnutls,netclient,syndic -linkpkg utils.ml http.ml rss2html.ml -o rss" *)
(* End: *)