-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
425 lines (382 loc) · 22 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
<!DOCTYPE html>
<html>
<head lang="en">
<meta charset="UTF-8">
<title>Uber Project</title>
<!-- Libraries and Such-->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js"></script>
<script src="libs/d3/d3.min.js" charset="utf-8"></script>
<script src="libs/bootstrap/js/bootstrap.min.js" charset="utf-8"></script>
<script src="libs/plotly-latest.min.js" charset="utf-8"></script>
<script src="http://d3js.org/queue.v1.min.js"></script>
<script src="https://maps.googleapis.com/maps/api/js"></script>
<!-- Stylesheets-->
<link rel="stylesheet" href="//code.jquery.com/ui/1.11.4/themes/smoothness/jquery-ui.css">
<link rel="stylesheet" type="text/css" href="libs/bootstrap/css/bootstrap.min.css">
<link rel="stylesheet" type="text/css" href="css/myStyle.css">
<!-- Get some nice font-->
<link href='http://fonts.googleapis.com/css?family=PT+Sans:400,700' rel='stylesheet' type='text/css'>
<!-- External js files -->
<script src="js/surgeEstimator.js"></script>
<script src="js/mapvis.js"></script>
<script src="//code.jquery.com/jquery-1.10.2.js"></script>
<script src="//code.jquery.com/ui/1.11.4/jquery-ui.js"></script>
<script>
$(function() {
$( "#datepicker" ).datepicker(
);
});
</script>
</head>
<body>
<nav class="navbar navbar-default navbar-fixed-top">
<div class="container-fluid">
<div class="navbar-header">
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
</div>
<div class="navbar-header">
<img src="img/usurge.png" alt="uSurge Logo" width=120/>
</div>
<div class="collapse navbar-collapse">
<ul class="nav navbar-nav navbar-right">
<li class="navList">I plan to ride</li>
<li>
<form id="rideSelect">
<select id="rideType" class="estimatorForm">
<option value="uberX">UberX</option>
<option value="uberXL">UberXL</option>
<option value="UberBlack">UberBlack</option>
<option value="UberSUV">UberSUV</option>
<option value="uberTAXI">UberTAXI</option>
<option value="uberSELECT">UberSELECT</option>
</select>
</form>
</li>
<li class="navList">on</li>
<li>
<form id="dateSelect">
<input type="text" id="datepicker" class="estimatorForm">
</form>
</li>
<li class="navList">at</li>
<li>
<form id="timeSelect">
<input type="time" name="usr_time" id="timepicker" class="estimatorForm">
</form>
</li>
<li class="navList">in</li>
<li>
<form id="citySelect">
<select name="city" id="nowCity" class="estimatorForm">
<option value="Atlanta">Atlanta</option>
<option value="Boston">Boston</option>
<option value="New York">New York</option>
<option value="San Francisco">San Francisco</option>
<option value="Washington DC">Washington DC</option>
</select>
</form>
</li>
<li class="navList">
<button id="#submitChanges" onclick=submitChanges()>
<p id="submitChangesText"> What's my surge? </p>
</button>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</div><!-- /.container-fluid -->
</nav>
<div class="row row-height"></div>
<div class="row row-height"></div>
<div class="row" id="topRow">
<div class="col-md-6">
<div class="row">
<div class="col-md-8 sectionTab">
<div class="col-md-4"></div>
<div class="col-md-8">
<h1><b id="selectedCity">Select a city</b></h1>
</div>
</div>
<div class="col-md-4"></div>
</div>
<div class="row">
<div class="col-md-3">
<h3 class="surgeDisplay1" id="selectedDate"></h3>
</div>
<div class="col-md-3">
<h3 class="surgeDisplay1" id="selectedTime"></h3>
</div>
<div class="col-md-3">
<h3 class="surgeDisplay1" id="selectedWeather"></h3>
</div>
<div class="col-md-3">
<h3 class="surgeDisplay1" id="selectedTemp"></h3>
</div>
</div>
<div class="row" id="surgeProb">
<div class="col-md-3 surgeTitle">
<h2>No Surge</h2>
<h3 class="surgeDisplay" id="noSurge"></h3>
</div>
<div class="col-md-3 surgeTitle">
<h2>Low Surge</h2>
<h3 class="surgeDisplay" id="lowSurge"></h3>
</div>
<div class="col-md-3 surgeTitle">
<h2>Mid Surge</h2>
<h3 class="surgeDisplay" id="midSurge"></h3>
</div>
<div class="col-md-3 surgeTitle">
<h2>High Surge</h2>
<h3 class="surgeDisplay" id="highSurge"></h3>
</div>
</div>
</div>
<div class="col-md-6" id="mapVis">
<div id="map"></div>
</div>
</div>
<div class="row-height"></div>
<!--div class="row row-border">
<div class="col-md-4">
</div>
<div class="col-md-8">
<div class="col-md-8 sectionText">dsds</div>
<div class="col-md-4"></div>
</div>
</div-->
<div class="row">
<div class="col-md-4">
<div class="row-border"></div>
<div class="row sectionTab">
<h1><b>Surges Explained</b></h1>
</div>
</div>
<div class="col-md-8 sectionText">
<div class="col-md-10 col-md-offset-1">
<h3><b>No surge:</b> Looks like it's your lucky day!</h3>
<h3><b>Mid surge:</b> 1.5x</h3>
<h3><b>Low surge:</b> Under 1.5x</h3>
<h3><b>High surge:</b> Over 1.5x</h3>
</div>
</div>
<div class="col-md-1"></div>
</div>
<div class="row-height"></div>
<div class="row">
<div class="col-md-4 col-md-push-8">
<div class="row-border"></div>
<div class="row sectionTab">
<h1><b>What we did</b></h1>
</div>
</div>
<div class="col-md-8 col-md-pull-4 sectionText">
<div class="col-md-10 col-md-offset-1"><br>
<p>First, we collected the data using uber's new API on an AWS machine. Second, we used exploratory data analysis to glean more insights into the data set. Much of the exploratory data analysis proved our intuition correct. For example, the colder the temperature, the less likely people are to walk and the more likely there will be a surge price on ubers. Finally, we trained a Random Forest Classifier on our dataset to have a comprehensive model that can be used to glean further insights into the data and create a very reliable prediction of the surge price (over 96% accuracy!). After creating the model, we wanted to look at Boston specifically. We turned attention to two of the most important features we found in our model: temperature and car type. We proved our intuition with temperature using the model, then showed that it is possible also show consumer behaivor in Boston with respect to service type.</p><br>
</div>
</div>
</div>
<div class="row-height"></div>
<div class="row">
<div class="col-md-4">
<div class="row-border"></div>
<div class="row sectionTab">
<h1><b>What got us going</b></h1>
</div>
</div>
<div class="col-md-8 sectionText">
<div class="col-md-10 col-md-offset-1"><br>
<p>We wanted to explore Uber’s concept of surge pricing by conducting an analysis of which factors affect surge pricing the most. This was particularly interesting to us because Uber’s API was just recently made public, so there have not been many predictive/analytical projects looking at surge pricing. This gives us a novel opportunity to explore data that affects our daily lives, and to incorporate several of the statistics and computational methodologies that we learned this semester. The data set that we have compiled can also allow us to look at many different variables and their effect on surge pricing. For example, we will look at how temperature and weather affect the surge charge on an uber. Also, should someone call the Uber now, or if they have time wait an hour or two in order to try and find a better price? We all use Uber, and love the service; however, we wanted to create a predictive model in order to find the times that we do not have to be charged extra money!</p><br>
</div>
</div>
<div class="col-md-1"></div>
</div>
<div class="row-height"></div>
<div class="row">
<div class="col-md-4 col-md-push-8">
<div class="row-border"></div>
<div class="row sectionTab">
<h1><b>Uber API</b></h1>
</div>
</div>
<div class="col-md-8 col-md-pull-4 sectionText">
<div class="col-md-10 col-md-offset-1"><br>
<p>The Uber API was made available to the general public earlier this year. While the company's main motivation for opening up the API was to allow technology developers to embed Uber into their apps, it also allowed for users like to amass and analyze ride data. While Uber did not allow us to access past historical data, once we knew we wanted to collect data we were able to use AWS to make calls (queries?) every 5 minutes to the Uber API and receive back information as if we were a user requesting a ride at that moment in time. We did this over 3 weeks in 5 different cities.</p><br>
</div>
</div>
</div>
<div class="row-height"></div>
<div class="row">
<div class="col-md-4">
<div class="row-border"></div>
<div class="row sectionTab">
<h1><b>The Data</b></h1>
</div>
</div>
<div class="col-md-8 sectionText">
<div class="col-md-10 col-md-offset-1"><br>
<p> The dataset we collected over 3 weeks was information pulled from an API call every 5 minutes to Uber's <a href="https://developer.uber.com/docs/v1-estimates-price" target="blank">Price Estimates</a> endpoint passing in parameters for the location we wanted to request a ride from, as well as an API call to WeatherUnderground's API to receive temperature and weather data. For each call we received the unique ride ID, time/date, type of uber, surge, temperature, weather description, state, and income of the area. </p><br>
</div>
</div>
</div>
<div class="row-height"></div>
<div class="row">
<div class="col-md-4 col-md-push-8">
<div class="row-border"></div>
<div class="row sectionTab">
<h1><b>What we found</b></h1>
</div>
</div>
<div class="col-md-8 col-md-pull-4 sectionText">
<div class="col-md-10 col-md-offset-1"><br>
<h3>Exploratory Analysis</h3>
<p>We found that generally most rides do not have surge, only about 4% of the time will an Uber ride contain a surge charge. Average surge overall was about 1.024x the base price. The graphs we found to be most interesting were the graph on the effect of the city effect on surge, the effect of temperature on surge, the effect of time of day on surge, and the effect of weather type on surge.</p>
<h3>City</h3>
<iframe src="https://plot.ly/~abigailorlando/67/effect-of-city-on-surge/" width="100%" height="500" seamless="seamless" scrolling="no"></iframe>
<p> Of all the cities that we looked at Boston and Washington DC have highest overall surge and New York has the lowest. New York having the lowest surge intuitively makes sense as New York is an Uber hub as well as has lots of alternative modes of transportation, which limits excess demand. Boston and DC have less developed public transportation systems so the demand for Uber is higher. </p>
<br>
<h3>Temperature</h3>
<iframe src="https://plot.ly/~abigailorlando/53/effect-of-temperature-on-surge/" width="100%" height="500" seamless="seamless" scrolling="no"></iframe>
<p> We see that with extremely cold temperatures there is a higher surge because of higher demand for Uber with not as much supply. We see a similar effect but not as strong for extremely hot temperatures. </p>
<br>
<h3>Weather</h3>
<iframe src="https://plot.ly/~abigailorlando/49/effect-of-weather-on-surge/" width="100%" height="500" seamless="seamless" scrolling="no"></iframe>
<p> Weather conditions are clear most of the time, but when there is rain or there is an indication of rain with cloudiness, surge pricing goes up as people do not want to travel in adverse weather conditions. </p>
<br>
<h3>Time of Day</h3>
<iframe src="https://plot.ly/~abigailorlando/71/effect-of-time-on-surge/" width="100%" height="500" seamless="seamless" scrolling="no"></iframe>
<p> Overall, the highest average surge is at 5 and 6 PM, followed by 8 AM. Since these are rush hour times, it makes sense that there would be high demand for Ubers at these times, and not enough supply. The lowest average surge occurs at 10 AM and 8 PM, which also makes sense because there is generally a low demand for transportation at these times of the day. </p>
<br>
<h3>Model Summary</h3>
<p>In order to create a predictive model, we decided to use a Random Forest Classifier. Using a Random Forest Classifier makes sense for three reasons. First, we are using a dataset with unbalenced classes in that there are many more cases where there is "No Surge" than the other three classes. A Random Forest works well with unbalenced classes. Second, we are using a lot of data in the training of the model. Random Forests samples from the data many times and so the prediction will be optimal if there is a lot of data to be used. Finally, we are able to look at feature importance in the data, which is particularly enlightening on this dataset. Thus we are able to see what features have the biggest effect on the pricing prediction.</p>
<p>We split the dataset into a test and training set first in order to test for overfitting of the model. We then had to choose the hyperparameters for the model. We used a 5 fold cross validation technique in order to train the hyperparameters of the model. After fully training the model, we found that the model predicted the correct class 96.3% of the time on the test set and 98% of the time on the training set. This small difference shows that the model was not overfitted and we properly trained the model.</p>
<br>
<h3>Final Analysis</h3>
<p>For the final Analysis of the model, we decided to look at two aspects of the model. First, we look at the feature importance found in the model sorted by the features that affect the prediction the model the most. Second, we restrict the model to the Boston area to look at how the city specific surge prices are affected by some of the most important variables found in the model: temperature and cartype</p>
<br>
<h3>Feature Importance</h3>
<img src="img/feature_importance.png" width="100%"/>
<p>The graph shows the top 10 most predictive features in the model. As you can see, temperature is by far the most predictive variable that we pass into the model. Second, the next variable that seems to affect the prediction is the type of car. This is interesting because our car type feature serves as a proxy for the supply and demand dynamics of each uber service.</p>
<br>
<h3>Temperature's Effect on Boston Ubers</h3>
<img src="img/temperature_model.png" width="100%"/>
<p>We created dummy data in order to create this plot. We fixed all other variables but only altered the temperature variable to see how the probability of a surge changes. As you can see, as temperature increases so does the probability of there being no uber surge. Interestingly enough, if you want to get a cheap uber, then use the service when it is hot outside! The model predicts that the colder the temperature is, the more likely there is to be a surge multiple.</p>
<br>
<h3>Car Type Effect on Boston Ubers</h3>
<img src="img/cartype_model.png" width="100%"/>
<p>The graph shows an interesting proxy into consumer usage of uber's services in Boston. We interpret the graph as the higher the probability of a surge, the more demand there is for that particular service, which in turn leads to higher surges. uberX and uberXL are by far the most demanded services in the Boston area. uberBLACK seems to be the least demanded service. The graph also shows the widespread application of the model and the incredible amount of analysis that can be done to glean insights into uber's business!</p>
<br>
</div>
</div>
</div>
<div class="row-height"><hr></div>
<div class="row" id="drivers">
<div class="col-md-4">
<div class="row-border"></div>
<div class="row sectionTab">
<h1><b>Meet the drivers</b></h1>
</div>
</div>
<div class="col-md-8">
<div class="col-md-1"></div>
<div class="col-md-2 driverInfo">
<div class="row">
<img src="img/Trabes.png"/>
</div>
<div class="row">
<b>
<h4 class="riderName">Peter Traber</h4>
Applied Math & Government
</b>
</div>
</div>
<div class="col-md-2 driverInfo">
<div class="row">
<img src="img/Abbie.png"/>
</div>
<div class="row">
<b>
<h4 class="riderName">Abbie Orlando</h4>
Neurobiology & Computer Science
</b>
</div>
</div>
<div class="col-md-2 driverInfo">
<div class="row">
<img src="img/Summer.png"/>
</div>
<div class="row">
<b>
<h4 class="riderName">Summer Carter</h4>
Computer Science & Economics
</b>
</div>
</div>
<div class="col-md-2 driverInfo">
<div class="row">
<img src="img/Dill.png"/>
</div>
<div class="row">
<b>
<h4 class="riderName">Dillon Tiner</h4>
Statistics & History of Art and Architecture
</b>
</div>
</div>
<div class="col-md-2 driverInfo">
<div class="row">
<a href="http://procatinator.com/" target="blank"><img src="img/Scratchy.png"/></a>
</div>
<div class="row">
<b>
<h4 class="riderName">Scratchy</h4>
Astrophysics & Folklore and Mythology (Joint)
</b>
</div>
</div>
</div>
</div>
<div class="row-height"></div>
<!-- -->
<!-- -->
<!-- AFTER ALL HTML IS LOADED -->
<!-- -->
<!-- -->
<script>
/* Location Finder -- NECESSARY?? */
var x = document.getElementById("suggestCityText");
function getLocation() {
if (navigator.geolocation) {
navigator.geolocation.getCurrentPosition(showPosition, showError);
} else {
x.innerHTML = "Geolocation is not supported by this browser.";
}
}
function showPosition(position) {
x.innerHTML = "Thanks!";
}
function showError(error) {
switch(error.code) {
case error.PERMISSION_DENIED:
x.innerHTML = "Alrighty then!"
break;
case error.POSITION_UNAVAILABLE:
x.innerHTML = "Thanks!"
break;
case error.TIMEOUT:
x.innerHTML = "Thanks!"
break;
case error.UNKNOWN_ERROR:
x.innerHTML = "Thanks!"
break;
}
}
</script>
</body>
<footer>
</footer>
</html>