-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrss-o-matic.rb
executable file
·108 lines (90 loc) · 3.85 KB
/
rss-o-matic.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/local/bin/ruby
# -*- encoding: utf-8 -*-
#encoding=utf-8
require 'rubygems'
require 'open-uri'
require 'erb'
require 'sanitize'
require 'json'
def log(tag,message)
puts tag + ": " + message
end
rss_feeds = [
"http://rss.slashdot.org/Slashdot/slashdot",
"http://www.jconline.com/apps/pbcs.dll/section?category=news%25&template=rss",
"http://www.jconline.com/apps/pbcs.dll/section?category=sports%25&template=rss",
"http://news.google.com/news?pz=1&jfkl=true&cf=all&ned=us&hl=en&topic=w&output=rss",
"http://news.google.com/news?pz=1&jfkl=true&cf=all&ned=us&hl=en&topic=n&output=rss",
"http://news.google.com/news?q=West+Lafayette,+Indiana&hl=en&safe=active&client=firefox-a&hs=ozw&rls=org.mozilla:en-US:official&bav=on.2,or.r_gc.r_pw.&biw=1280&bih=761&um=1&ie=UTF-8&output=rss",
"http://feeds.reuters.com/news/artsculture",
"http://feeds.reuters.com/reuters/businessNews",
"http://feeds.reuters.com/ReutersBusinessTravel",
"http://feeds.reuters.com/reuters/companyNews",
"http://feeds.reuters.com/Counterparties",
"http://feeds.reuters.com/reuters/Election2012",
"http://feeds.reuters.com/reuters/entertainment",
"http://feeds.reuters.com/reuters/environment",
"http://feeds.reuters.com/reuters/healthNews",
"http://feeds.reuters.com/reuters/lifestyle",
"http://feeds.reuters.com/news/reutersmedia",
"http://feeds.reuters.com/news/wealth",
"http://feeds.reuters.com/reuters/MostRead",
"http://feeds.reuters.com/reuters/oddlyEnoughNews",
"http://feeds.reuters.com/ReutersPictures",
"http://feeds.reuters.com/reuters/peopleNews",
"http://feeds.reuters.com/Reuters/PoliticsNews",
"http://feeds.reuters.com/reuters/scienceNews",
"http://feeds.reuters.com/reuters/sportsNews",
"http://feeds.reuters.com/reuters/technologyNews",
"http://feeds.reuters.com/reuters/topNews",
"http://feeds.reuters.com/Reuters/domesticNews",
"http://feeds.reuters.com/Reuters/worldNews"
];
html_counter = 0
rss_feeds.each do |rss_feed|
# set teh variablez
@rss_feed_title = "[RSS FEED TITLE]"
rss = Nokogiri::XML(open(rss_feed))
@rss_feed_title = rss.search("channel > title")[0].content
@rss_items = []
rss.search("item").each do |item|
item_title = item.search("title")[0].content
item_description = item.search("description")[0].content.gsub(/<.*>/m, "")
test_string = (item_title + item_description).downcase
# naughty word filtering
# spaces are put there to ensure that they aren't standalone words
if((test_string.include? "porn") ||
(test_string.include? "sex") ||
(test_string.include? "fuck") ||
(test_string.include? "shit") ||
(test_string.include? "rape") ||
(test_string.include? "damn")) # add any more naughty words later...
log("ITEM_GEN", "Omitting:" + "\n" + item_title + "\n\t" + item_description + "\n\tbecause we found a naughty word.")
else
@rss_items << {:title => item_title, :description => item_description}
end
end
if !(@rss_items.empty?)
html_counter += 1
begin
@template = ""
log("WRITE", "Reading the template")
File.open('./views/rss-o-matic.html.erb', 'r') do |f|
@template = f.read
end
log("WRITE", "ERB-ifying the template")
template = ERB.new @template
log("WRITE", "Writing the file")
File.open("./public-rss-o-matic/index-" + html_counter.to_s + ".html", 'w') do |f|
f << template.result(binding)
end
rescue => variable
File.open('./error.log', 'a') do |f|
f << $!
log("WRITE", "ERROR: \"" + $!.to_s + "\"!")
print variable.backtrace.join("\n")
log("WRITE", "END ERROR")
end
end
end
end