-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathquote_scraper.rb
39 lines (29 loc) · 872 Bytes
/
quote_scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
require 'nokogiri'
require 'open-uri'
require 'json'
class QuoteScraper
ROOT_URL = 'https://www.goodreads.com/quotes?page='
PAGE_LIMIT = 25
QUOTE_CUTOFF = 280
def initialize
@quotes = []
@pages = []
PAGE_LIMIT.times { |i| @pages.push(Nokogiri::HTML(URI.open("#{ROOT_URL}#{i}"))) }
end
def gather_data
File.open("quote_data.json", "w") { |f| f.write quote_data }
end
private
def quote_data
objects = []
@pages.each do |page|
page.css(".quote").each do |quote|
content = quote.search(".quoteText > text()").text.to_s.strip.split("”")[0].gsub("“", "")
author = quote.search(".authorOrTitle > text()").text.to_s.split(',')[0].strip
objects.push("mq('#{content}', '#{author}')") if content.length < QUOTE_CUTOFF
end
end
objects.to_json
end
end
QuoteScraper.new().gather_data