-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsentiment-analysis.rb
211 lines (187 loc) · 5.89 KB
/
sentiment-analysis.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#!/usr/bin/env ruby
require 'rubygems'
require 'twitter'
require 'pp'
require 'xlsx_writer'
require 'fileutils'
require 'geocoder'
require 'net/http'
MAX_ATTEMPTS = 10
start = Time.now
doc = XlsxWriter.new
Twitter.configure do |config|
config.consumer_key = 'aTvMTvc7IuShZ3KOyuQ'
config.consumer_secret = 'r2iHsycen9HpjncRFJh2hYfxS7ChaW51U1HUw6rM'
config.oauth_token = '6429542-SCj7hyOcjgei5rPRSoSiHm1DKWm9xNTlOyAMM8oxjq'
config.oauth_token_secret = '6vnKTWPKJX4KN27kHjUcGu5yV3opnzI1XtGlSsKBFg'
end
# District 2 – keywords – Jesse Jackson Jr. (Jackson) vs Brian Woodworth (woodworth)
# District 8 – keywords – Joe Walsh (walsh) vs Tammy Duckworth (duckworth)
# District 10 – Keywords – Robert Dold (dold) vs Brad Schneider (Schneider)
# District 11 – Judy Biggert (biggert) vs Bill Foster (foster)
# Indiana Senate Race – Joe Donnelly (Donnelly) vs Richard Mourdock (mourdock, said with a long roll as if thunder is cracking when you say his name)
# Presidential election: Barack Obama (Obama) vs Mitt Romney (Romney)
# Other keywords: (vote, voting, voted, ballot, election, electoral college, popular vote
searches = ["jesse jackson OR woodworth -filter:retweets -http",
"joe walsh OR duckworth -filter:retweets -http",
"dold OR schneider -maddy_schneider -filter:retweets -http",
"biggert OR bill foster -filter:retweets -http",
"donnelly OR mourdock -filter:retweets -http",
"obama OR romney -filter:retweets -http",
"vote OR voting OR voted OR ballot OR election OR electoral college OR popular vote -filter:retweets -http"
]
index = 0
searches.each do |search|
since_id = 0
max_id = 0
index = index + 1
if index == 1
sheet = doc.add_sheet("District 2")
since_id = 265974916539621000+1
elsif index == 2
sheet = doc.add_sheet("District 8")
since_id = 265955382290182000+1
elsif index == 3
sheet = doc.add_sheet("District 10")
since_id = 265969765217104000+1
elsif index == 4
sheet = doc.add_sheet("District 11")
since_id = 265974422505136000+1
elsif index == 5
sheet = doc.add_sheet("Indiana Senate")
since_id = 265975208865829000+1
elsif index == 6
sheet = doc.add_sheet("Prez")
since_id = 265976815053578000+1
elsif index == 7
sheet = doc.add_sheet("Voting")
since_id = 265984784684167000+1
end
sheet.freeze_top_left = 'A2'
sheet.add_row([
"Tweet ID",
"User",
"Tweet",
"Created At",
"Followers",
"Location",
"Coordinates",
"Address",
"Polarity"
])
set = 1
how_many = 0
rows = []
sentiment_json = {"data" => []}
while set <= 5 && (how_many%100 == 0 || how_many == 0)
pp "set: #{set}, how_many = #{how_many}, since_id = #{since_id}, max_id = #{max_id}, search = #{search}"
if set == 1
results = Twitter.search(search, :count => 100, :geocode => '41.743507,-88.011847,75mi', :result_type => "recent", :since_id => since_id).results
else
results = Twitter.search(search, :count => 100, :geocode => '41.743507,-88.011847,75mi', :result_type => "recent", :max_id => max_id).results
end
set += 1
first = true
if !results.nil?
how_many = -1
count = 0
results.map do |status|
num_attempts = 0
count += 1
begin
how_many += 1
pp "#{how_many} | #{count}"
num_attempts += 1
text = status.text
tweeter = status.from_user
id = status.id
max_id = id
if first
since_id = id
first = false
how_many += 1
pp "#{how_many} | #{count}"
end
geo = status.geo
coordinates = ""
address = ""
if geo
lat = geo.latitude
long = geo.longitude
address = Geocoder.address([lat, long])
coordinates = "#{lat}, #{long}"
end
created_at = status.created_at.to_s
pp "user search"
user = Twitter.user_search(tweeter).first
pp "end user search"
if user
followers = user.followers_count
location = user.location
end
status_json = {"id" => id, "text" => text}
sentiment_json['data'].push(status_json)
pp "#{text} | #{tweeter} | #{id} | #{location} | #{lat},#{long} | #{address} | #{created_at} | #{followers} | #{created_at}"
hash = {:id => id, :tweeter => tweeter, :text => text, :created_at => created_at, :followers => followers, :location => location, :coordinates => coordinates, :address => address}
rows.push(hash)
# sheet.add_row([
# {:type => :BigDecimal, :value => id},
# tweeter,
# text,
# created_at,
# followers,
# location,
# coordinates,
# address
# ])
rescue Twitter::Error::TooManyRequests => error
pp "too many requests: #{num_attempts}"
how_many -= 1
if num_attempts <= MAX_ATTEMPTS
# NOTE: Your process could go to sleep for up to 15 minutes but if you
# retry any sooner, it will almost certainly fail with the same exception.
sleep error.rate_limit.reset_in
retry
else
raise
end
rescue
pp "other error"
end
end
else
how_many = -1
end
pp "set: #{set}, how_many = #{how_many}, since_id = #{since_id}, max_id = #{max_id}, search = #{search}"
end
uri = URI.parse("http://www.sentiment140.com/api/bulkClassifyJson?appid=webmaster@wbez.org")
req = Net::HTTP::Post.new(uri.request_uri, initheader = {'Content-Type' =>'application/json'})
req.body = sentiment_json.to_json
response = Net::HTTP.new(uri.host, uri.port).start {|http| http.request(req) }
response_data = JSON.parse response.body
sentiment_data = {}
response_data["data"].each do |data|
response_id = data['id']
polarity = data['polarity']
sentiment_data.merge! response_id => polarity
end
rows.each do |row|
sheet.add_row([
{:type => :BigDecimal, :value => row[:id]},
row[:tweeter],
row[:text],
row[:created_at],
row[:followers],
row[:location],
row[:coordinates],
row[:address],
sentiment_data[row[:id]]
])
end
end
::FileUtils.mv doc.path, 'sentiment-9.xlsx'
doc.cleanup
done = Time.now
pp start
pp done
pp done - start