Skip to content

Commit

Permalink
Created epic algo
Browse files Browse the repository at this point in the history
  • Loading branch information
sammce committed Feb 11, 2021
1 parent bf666c0 commit e2777ab
Show file tree
Hide file tree
Showing 10 changed files with 572,411 additions and 612,315 deletions.
Binary file modified __pycache__/clean.cpython-38.pyc
Binary file not shown.
Binary file modified __pycache__/formatter.cpython-38.pyc
Binary file not shown.
Binary file modified __pycache__/process.cpython-38.pyc
Binary file not shown.
98 changes: 24 additions & 74 deletions clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ class CleanedData(Formatter):
# Create dictionary to house cleaned DataFrames
frames = {}

def __init__(self):
def __init__(self, new=False):
'''
Reads in data from specific excel files and stores it in
various variables.
Expand Down Expand Up @@ -34,35 +34,9 @@ def __init__(self):
# used as row and column identifiers respectively
self.years = self.np.array([], dtype=int)
self.places = self.np.array(self.new_house_data.iloc[0][1:8].tolist())
# END OF AVERAGE PRICES PARSING


# START OF RAW PRICES PARSING
self.raw_data = self.pd.read_csv("with_outliers.csv")
self.cleaned_data = self.pd.read_csv("cleaned.csv")

outlier_dict = {
'min': {
'Dublin': 150000,
'Cork': 125000,
'Galway': 135000,
'Limerick': 110000,
'Waterford': 100000,
'Other Areas': 80000
},
'max': {
'Dublin': 570000,
'Cork': 500000,
'Galway': 520000,
'Limerick': 440000,
'Waterford': 460000,
'Other Areas': 400000,
},
'new': 8,
}
self.places_no_national = self.np.array(self.new_house_data.iloc[0][2:8].tolist())

self.clean_averages()
# self.clean_raw_prices(outlier_dict)

def clean_averages(self):
# Clean new listings excel sheet
Expand Down Expand Up @@ -99,45 +73,6 @@ def clean_averages(self):
break

self.old_avg = self.pd.DataFrame.from_dict(data, orient='index', columns=self.places)

def clean_raw_prices(self, outlier_dict):
# self.raw_final_new = []

self.cleaned_prices = self.cleaned_prices.drop(columns=["Unnamed: 0.1"])
# self.raw_price = self.raw_price.rename(columns={
# "Date of Sale (dd/mm/yyyy)":"Year",
# "Price (€)":"Price",
# "Description of Property":"Description",
# })

for index, row in self.raw_data.iterrows():
# removes first character (€), any separating commas and turns to int
price = row['Price']
place = row['County']

# if price < outlier_dict['min'][place] or price > outlier_dict['max'][place]:
# self.raw_price = self.raw_price.drop([index])
# row_place = self.raw_price.loc[index, 'County']
# row_desc = self.raw_price.loc[index, 'Description']

# if not row_place in self.places:
# self.raw_price.loc[index, 'County'] = "Other Areas"
# row_place = "Other Areas"

# self.raw_price.loc[index, 'Year'] = int(row['Year'].split('/')[-1])

# if 'New' in row_desc:
# self.raw_price.loc[index, 'Description'] = "New"
# row_price = round((row_price / 100) * outlier_dict['new'])
# else:
# self.raw_price.loc[index, 'Description'] = "Second"

# # outlier dictionary is passed where min and max values are defined
if price < outlier_dict['min'][place] or price > outlier_dict['max'][place]:
self.cleaned_prices = self.cleaned_prices.drop([index])

self.cleaned_prices.to_csv("cleaned.csv", index=False)
print("Success!")

def get_every_nyears(self, df, n=1):
'''
Expand Down Expand Up @@ -184,12 +119,27 @@ def search(self, df, location, year):
Returns an int, or prints an error message if no results are found.
'''
return df.loc[int(year), location.title()]






if __name__=='__main__':
cleaned = CleanedData()
dublin_houses = cleaned.cleaned_data.loc[cleaned.cleaned_data['County'] == 'Dublin']
houses_2015 = dublin_houses.loc[dublin_houses['Year'] == 2015]
mean_2015 = houses_2015['Price'].mean()
print(mean_2015)
# print(cleaned.raw_price)
# print(cleaned.search('2004', 'dublin'))
import plotly.express as px
# cleaned = CleanedData(new=True)

# data = []
# for place in cleaned.places:
# if place == 'National':
# continue

# new_df = df[
# (df['Year'] == 2015) & (df['County'] == place) & (df['Description'] == 'New')
# ]
# data.append([place, round(new_df['Price'].mean())])

# our_averages_2015 = cleaned.pd.DataFrame(data, columns=["Place", 'Price'])
# print(our_averages_2015)
# graph = px.bar(our_averages_2015, x="Place", y="Price", title="Price of new 2015 houses across Ireland")
# graph.write_html("newhtml.html", full_html=False, include_plotlyjs=False)
Loading

0 comments on commit e2777ab

Please sign in to comment.