Skip to content

Commit

Permalink
Added more functions to analyze data with pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
bstellato committed Feb 24, 2017
1 parent 2044a78 commit 738e6d2
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 15 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,4 @@ out/
# Data Files
# -------------------------------------------------------------------
interfaces/python/tests/qp_problems/results/
interfaces/python/tests/qp_problems/figures/
140 changes: 127 additions & 13 deletions interfaces/python/tests/qp_problems/fit_results.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,145 @@
from __future__ import print_function
import matplotlib as mpl
mpl.use('Agg') # For plotting on remote server
# mpl.use('Agg') # For plotting on remote server
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
pd.set_option('display.width', 1000)


# import sklearn tools
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline # Make pipeline for estimators
from sklearn.preprocessing import PolynomialFeatures # Construct polynomials
from sklearn.linear_model import (LinearRegression, HuberRegressor, Ridge)
from sklearn.metrics import mean_squared_error

# # Define candidate function to be fit
# def func_iter(x, c0, c1, c2, c3, c4):
# # import sklearn tools
# from sklearn.model_selection import train_test_split
# from sklearn.pipeline import make_pipeline # Make pipeline for estimators
# from sklearn.preprocessing import PolynomialFeatures # Construct polynomials
# from sklearn.linear_model import (LinearRegression, HuberRegressor, Ridge)
# from sklearn.metrics import mean_squared_error
#
# return c0*np.power(x[0], c1*x[3] + c2*x[4])*np.power(x[1], c3)*np.power(x[2], c4)
# # # Define candidate function to be fit
# # def func_iter(x, c0, c1, c2, c3, c4):
# #
# # return c0*np.power(x[0], c1*x[3] + c2*x[4])*np.power(x[1], c3)*np.power(x[2], c4)


def get_best_params(df):
"""
Transform weighted frame into another frame with best parameters
"""
# Get best parameters
df_best = df.loc[df['w'] == 1.]

# Get highest sigma
min_sigma = df_best['sigma'].min()

# Get best row
df_best = df_best.loc[(df_best['sigma'] == min_sigma)]

if len(df_best) > 1: # If multiple values choose one with min alpha
min_alpha = df_best['alpha'].min()
df_best = df_best.loc[(df_best['alpha'] == min_alpha)]

return df_best




def weight_by_iter(df):
"""
Weight sample using their number of iterations related to the min one
"""
df['w'] = df['iter'].min() / df['iter']
return df


def save_plot(df, name):
"""
Plot behavior of 'name' in selected dataframe
"""

# Dummy value always true
location = (df['alpha'] > 0 )

# Get best iteration values (there are many) and pick first pair sigma and alpha
if name is not 'sigma':
test_sigma = df.loc[(df['w'] == 1.)].sigma.values[-1]
location &= (df['sigma'] == test_sigma)
if name is not 'alpha':
test_alpha = df.loc[(df['w'] == 1.)].alpha.values[-1]
location &= (df['alpha'] == test_alpha)
if name is not 'rho':
test_rho = df.loc[(df['w'] == 1.)].rho.values[-1]
location &= (df['rho'] == test_rho)

# Get test case in specified location
test_case = df.loc[location]


# Plot behavior
plt.figure(figsize=(12,6))
plt.subplot(1, 2, 1)
ax = plt.gca()
if name is 'rho':
ax.set_xscale('log')
plt.scatter(test_case[name], test_case['iter'])
ax.set_ylabel('iter')
ax.set_xlabel(name)
plt.grid()
plt.show(block=False)

plt.subplot(1, 2, 2)
ax = plt.gca()
if name is 'rho':
ax.set_xscale('log')
plt.scatter(test_case[name], test_case['w'])
ax.set_ylabel('weight')
ax.set_xlabel(name)
plt.grid()
plt.show(block=False)

plt.tight_layout()
plt.savefig('figures/%s.pdf' % name)


# Main function
if __name__ == '__main__':

# Read results (only the ones less then max_iter)
res = pd.read_csv('results/results_full.csv')
res = res.loc[(res['iter'] < 2400)]
res = res.loc[(res['iter'] < 2499)] # Select problems not saturated at max number of iterations

# Problem headings
headings = ['n', 'm', 'name', 'seed']

# Group problems
problems = res.groupby(headings)
# n_problems = len(problems.groups)

# Assign weights to samples
res_w = problems.apply(weight_by_iter)
problems_w = res_w.groupby(headings)


# Plot behavior for fixed sigma and alpha and changing rho
# test_name = (50.0, 60.0, 'svm', 3076953921.0)
# test_name = (50.0, 60.0, 'svm', 107769053.0)
test_name = (40.0, 40.0, 'lasso', 685148778.0)
# test_name = (40.0, 40.0, 'lasso', 4089288235.0)

test_instance = problems_w.get_group(test_name)

# Save plots for rho, sigma and alpha
# save_plot(test_instance, 'rho')
# save_plot(test_instance, 'sigma')
# save_plot(test_instance, 'alpha')



# Get optimal parameters for lasso problem
same_type_probs = res_w.groupby(['name'])
lasso_probs = same_type_probs.get_group(('lasso'))
best_lasso = lasso_probs.groupby(['seed']).apply(get_best_params)
pd.tools.plotting.scatter_matrix(best_lasso)



# Select smaller dataset and consider only n, m, trP
# res = res.loc[(res['m'] < 100) & (res['n'] < 100)]
Expand Down
8 changes: 6 additions & 2 deletions interfaces/python/tests/qp_problems/qp_examples/qp_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@
from builtins import range
from builtins import object
import numpy as np
from utils.data_struct import data_struct, full_data_struct
import sys # To get maxsize

# Metadata class
import abc
from future.utils import with_metaclass

# Data structures
from utils.data_struct import data_struct, full_data_struct

class QPExample(with_metaclass(abc.ABCMeta, object)):

Expand Down Expand Up @@ -53,7 +57,7 @@ def perform_tests(self, **kwargs):
for _ in range(self.nm_num_prob): # Generate some random problems

# Get current seed
current_seed = np.random.randint(0, 4294967295)
current_seed = np.random.randint(0, sys.maxsize)
np.random.seed(current_seed)

# generate problem and store statistics
Expand Down

0 comments on commit 738e6d2

Please sign in to comment.