Skip to content

Commit

Permalink
update code to send to PSQL instead of csv file
Browse files Browse the repository at this point in the history
  • Loading branch information
oshadmon committed May 23, 2018
1 parent 7c52b90 commit 5c6df3d
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 3 deletions.
37 changes: 37 additions & 0 deletions gts/create_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
-- These tables are an alternative to the CSV files being created

DROP TABLE IF EXISTS repo_overview;
DROP TABLE IF EXISTS repo_visitors;
DROP TABLE IF EXISTS repo_clones;
DROP TABLE IF EXISTS repo_referrals;

CREATE TABLE repo_overview(
create_timestamp DATE NOT NULL DEFAULT DATE(NOW()), -- date of generated results
Repo_Name VARCHAR(255) NOT NULL DEFAULT '',
Result_Type VARCHAR(255) NOT NULL DEFAULT '',
Uniques INT NOT NULL DEFAULT 0,
Total INT NOT NULL DEFAULT 0
);

CREATE TABLE repo_visitors(
Repo_Name VARCHAR(255) NOT NULL DEFAULT '',
create_timestamp DATE NOT NULL DEFAULT DATE(NOW()),
Uniques INT NOT NULL DEFAULT 0,
Total INT NOT NULL DEFAULT 0
);


CREATE table repo_clones(
Repo_Name VARCHAR(255) NOT NULL DEFAULT '',
create_timestamp DATE NOT NULL DEFAULT DATE(NOW()),
Uniques INT NOT NULL DEFAULT 0,
Total INT NOT NULL DEFAULT 0
);

CREATE TABLE repo_referrals(
Repo_Name VARCHAR(255) NOT NULL DEFAULT '',
Referral VARCHAR(255) NOT NULL DEFAULT '',
Uniques INT NOT NULL DEFAULT 0,
Total INT NOT NULL DEFAULT 0
);

97 changes: 94 additions & 3 deletions gts/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import datetime
import getpass
import requests

import psycopg2

# Globals
current_timestamp = str(datetime.datetime.now().strftime('%Y-%m-%d-%Hh-%Mm')) # was .strftime('%Y-%m-%d'))
Expand Down Expand Up @@ -230,18 +230,91 @@ def store_csv(file_path, repo, json_response, response_type):
csv_writer.writerow(row)


def store_db(db_config={}, repo='', json_response='', response_type=''):
""" Store data for a given response into a corresponding table (described in create_table.sql):
repo_name, date, views, unique_visitors/cloners
:param db_config: dict - dictionary containing configuration information for database
:param repo: str - the GitHub repository name
:param json_response: json - the json input
:param response_type: str - 'views', 'clones', ''
"""

# Connect to database
conn = psycopg2.connect(host=db_config['host'], port=db_config['port'], user=db_config['user'], password=db_config['password'], dbname=db_config['dbname'])
conn.autocommit = True
cur = conn.cursor()


insert_repo_overview = "INSERT INTO repo_overview(Repo_Name, Result_Type, Uniques, Total) VALUES ('%s', '%s', %s, %s);"
if response_type == 'views': # send data to `repo_overview` and `repo_visitors`
insert_repo_visitors = "INSERT INTO repo_visitors(Repo_Name, create_timestamp, Uniques, Total) VALUES %s;"
cur.execute(insert_repo_overview % (repo, response_type, json_response['uniques'], json_response['count']))
cur.execute(insert_repo_visitors % __insert_data_none_reference(repo, json_response[response_type]))
elif response_type == 'clones': # send data to `repo_overview` and `repo_clones`
insert_repo_visitors = "INSERT INTO repo_clones(Repo_Name, create_timestamp, Uniques, Total) VALUES %s;"
cur.execute(insert_repo_overview % (repo, response_type, json_response['uniques'], json_response['count']))
cur.execute(insert_repo_visitors % __insert_data_none_reference(repo, json_response[response_type]))
else: # send data to `repo_referrals`
__insert_repo_referrals(cur, repo, json_response)

def __insert_data_none_reference(repo, json_response=None)->str:
""" Based on repo and info in json_response, generate the rows to be inserted into table:
repo_name, date, views, unique_visitors/cloners
:param repo: str - the GitHub repository name
:param json_response: json - the json input
:return return_stmt: str - string containing the rows which will be inserted
"""

return_stmt = ""
i = 0
for obj in json_response:
if i == len(json_response)-1:
return_stmt += "\n\t('%s', '%s', %s, %s);" % (repo, obj['timestamp'], obj['uniques'], obj['count'])
else:
return_stmt += "\n\t('%s', '%s', %s, %s)," % (repo, obj['timestamp'], obj['uniques'], obj['count'])
i += 1
return return_stmt

def __insert_repo_referrals(cur=None, repo='', json_response=None):
""" Based on repo and info in json_response, generate the rows to be inserted into `repo_referrals`:
repo_name, views, unique_visitors/cloners
:param cur: psql - Connection to the PSQL in order to execute queries
:param repo: str - the GitHub repository name
:param json_response: json - the json input
"""

insert_stmt = "INSERT INTO repo_referrals(Repo_Name, Referral, Uniques, Total) VALUES('%s', '%s', %s, %s)"
update_stmt = "UPDATE repo_referrals SET Uniques=%s, Total=%s WHERE Repo_Name='%s' AND Referral='%s'"
check_stmt = "SELECT COUNT(*) FROM repo_referrals WHERE Repo_Name='%s' AND Referral='%s'"
for obj in json_response:
# Check whether or not a given referral exists
check_count = check_stmt % (repo, obj['referrer'])
cur.execute(check_count)
count = cur.fetchall()[0][0]
if count == 0: # If referral doesn't exists create a new one
insert = insert_stmt % (repo, obj['referrer'], obj['uniques'], obj['count'])
else: # If referral exists update row
insert = update_stmt % (obj['uniques'], obj['count'], repo, obj['referrer'])
cur.execute(insert)

def main():
parser = argparse.ArgumentParser()
parser.add_argument('username', help='Github username')
parser.add_argument('repo', help='User\'s repo', default='ALL', nargs='?')
parser.add_argument('save_csv', default='save_csv', help='Set to "no_csv" if no CSV should be saved', nargs='?')
parser.add_argument('save_csv', default='save_csv', help='Set to "no_csv" if no CSV should be saved, or "set_db" if data should be saved in database', nargs='?')
parser.add_argument('-hp', '--host', default='127.0.0.1:5432', help='Set database host and port [127.0.0.1:5432]', nargs='?')
parser.add_argument('-usr', '--db-user', default='root:""', help='Set database user and password [root:""]', nargs='?')
parser.add_argument('-name', '--db-name', default='test', help='Set database where data will be stored', nargs='?')
parser.add_argument('-o', '--organization', default=None, help='Github organization')
args = parser.parse_args()
""" Run main code logic
:param username: string - GitHub username, or username:password pair
:param repo: string - GitHub user's repo name or by default 'ALL' repos
:param save_csv: string - Specify if CSV log should be saved
:optional:
param -hp, --host: string - Host and port to the database
param -usr, --db-user: string - user and password to the database
param -name, --db-name: string - database name
param -o, --organization: string - GitHub organization (if different from username)
"""

Expand All @@ -263,6 +336,13 @@ def main():
auth_pair = (username, pw)
# traffic_headers = {'Accept': 'application/vnd.github.spiderman-preview'}

# database config info
db_config = {'host': args.host.strip().split(":")[0],
'port': int(args.host.strip().split(":")[1]),
'user': args.db_user.strip().split(":")[0],
'password': args.db_user.strip().split(":")[1],
'dbname': args.db_name.strip()
}
if repo == 'ALL':
# By default iterate over all repositories
repos = []
Expand Down Expand Up @@ -300,7 +380,13 @@ def main():
store_csv(csv_file_name, repo, traffic_response, 'views')
store_csv(csv_file_name_clones, repo, clones_response, 'clones')
store_csv_referrers(csv_file_name_referrers, repo, referrers_response)
else:
if args.save_csv.strip() == 'set_db':
store_db(db_config, repo, traffic_response, 'views')
store_db(db_config, repo, clones_response, 'clones')
store_dbs(db_configs, repo, referrers_response)


else:
# Or just request 1 repo
traffic_response = send_request('traffic', organization, auth_pair, repo).json()
# Error handling in case of {'documentation_url': 'https://developer.github.com/v3', 'message': 'Not Found'}
Expand All @@ -317,6 +403,11 @@ def main():
store_csv(csv_file_name, repo, traffic_response, 'views')
store_csv(csv_file_name_clones, repo, clones_response, 'clones')
store_csv_referrers(csv_file_name_referrers, repo, referrers_response)
if args.save_csv.strip() == 'set_db':
store_db(db_config, repo, traffic_response, 'views')
store_db(db_config, repo, clones_response, 'clones')
store_db(db_config, repo, referrers_response)



if __name__ == '__main__':
Expand Down

0 comments on commit 5c6df3d

Please sign in to comment.