-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsql_statements.py
81 lines (74 loc) · 1.88 KB
/
sql_statements.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
CREATE_TRIPS_TABLE_SQL = """
CREATE TABLE IF NOT EXISTS trips (
trip_id INTEGER NOT NULL,
start_time TIMESTAMP NOT NULL,
end_time TIMESTAMP NOT NULL,
bikeid INTEGER NOT NULL,
tripduration DECIMAL(16,2) NOT NULL,
from_station_id INTEGER NOT NULL,
from_station_name VARCHAR(100) NOT NULL,
to_station_id INTEGER NOT NULL,
to_station_name VARCHAR(100) NOT NULL,
usertype VARCHAR(20),
gender VARCHAR(6),
birthyear INTEGER,
PRIMARY KEY(trip_id))
DISTSTYLE ALL;
"""
CREATE_STATIONS_TABLE_SQL = """
CREATE TABLE IF NOT EXISTS stations (
id INTEGER NOT NULL,
name VARCHAR(250) NOT NULL,
city VARCHAR(100) NOT NULL,
latitude DECIMAL(9, 6) NOT NULL,
longitude DECIMAL(9, 6) NOT NULL,
dpcapacity INTEGER NOT NULL,
online_date TIMESTAMP NOT NULL,
PRIMARY KEY(id))
DISTSTYLE ALL;
"""
COPY_SQL = """
COPY {}
FROM '{}'
ACCESS_KEY_ID '{{}}'
SECRET_ACCESS_KEY '{{}}'
IGNOREHEADER 1
DELIMITER ','
"""
COPY_MONTHLY_TRIPS_SQL = COPY_SQL.format(
"trips",
"s3://udacity-dend/data-pipelines/divvy/partitioned/{year}/{month}/divvy_trips.csv"
)
COPY_ALL_TRIPS_SQL = COPY_SQL.format(
"trips",
"s3://udacity-dend/data-pipelines/divvy/unpartitioned/divvy_trips_2018.csv"
)
COPY_STATIONS_SQL = COPY_SQL.format(
"stations",
"s3://udacity-dend/data-pipelines/divvy/unpartitioned/divvy_stations_2017.csv"
)
LOCATION_TRAFFIC_SQL = """
BEGIN;
DROP TABLE IF EXISTS station_traffic;
CREATE TABLE station_traffic AS
SELECT
DISTINCT(t.from_station_id) AS station_id,
t.from_station_name AS station_name,
num_departures,
num_arrivals
FROM trips t
JOIN (
SELECT
from_station_id,
COUNT(from_station_id) AS num_departures
FROM trips
GROUP BY from_station_id
) AS fs ON t.from_station_id = fs.from_station_id
JOIN (
SELECT
to_station_id,
COUNT(to_station_id) AS num_arrivals
FROM trips
GROUP BY to_station_id
) AS ts ON t.from_station_id = ts.to_station_id
"""