forked from lobsters/lobsters
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstory_cacher.rb
53 lines (41 loc) · 1.23 KB
/
story_cacher.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
class StoryCacher
cattr_accessor :DIFFBOT_API_KEY
# this needs to be overridden in config/initializers/production.rb
@@DIFFBOT_API_KEY = nil
DIFFBOT_API_URL = "http://www.diffbot.com/api/article".freeze
def self.get_story_text(story)
if !@@DIFFBOT_API_KEY
return
end
# XXX: diffbot tries to read pdfs as text, so disable for now
if story.url.to_s.match(/\.pdf$/i)
return nil
end
db_url = "#{DIFFBOT_API_URL}?token=#{@@DIFFBOT_API_KEY}&url=#{CGI.escape(story.url)}"
begin
s = Sponge.new
# we're not doing this interactively, so take a while
s.timeout = 45
res = s.fetch(db_url).body
if res.present?
j = JSON.parse(res)
# turn newlines into double newlines, so they become paragraphs
j["text"] = j["text"].to_s.gsub("\n", "\n\n")
while j["text"].match("\n\n\n")
j["text"].gsub!("\n\n\n", "\n\n")
end
return j["text"]
end
rescue => e
Rails.logger.error "error fetching #{db_url}: #{e.message}"
end
begin
s = Sponge.new
s.timeout = 45
s.fetch(story.archive_url)
rescue => e
Rails.logger.error "error caching #{db_url}: #{e.message}"
end
nil
end
end