Skip to content

Commit

Permalink
feat: Adding new data
Browse files Browse the repository at this point in the history
  • Loading branch information
vivekkatial committed Jun 12, 2024
1 parent ca87413 commit 26babf5
Show file tree
Hide file tree
Showing 17 changed files with 38,571 additions and 4,566 deletions.
637 changes: 637 additions & 0 deletions data/12-nodes/matilda_processed_graph-all.csv

Large diffs are not rendered by default.

637 changes: 637 additions & 0 deletions data/12-nodes/matilda_processed_graph-unweight-only.csv

Large diffs are not rendered by default.

3,947 changes: 3,947 additions & 0 deletions data/12-nodes/matilda_processed_graph-weight-only.csv

Large diffs are not rendered by default.

637 changes: 637 additions & 0 deletions data/12-nodes/matilda_processed_graph_weight-all.csv

Large diffs are not rendered by default.

3,947 changes: 3,947 additions & 0 deletions data/12-nodes/matilda_processed_graph_weight-weight-only.csv

Large diffs are not rendered by default.

637 changes: 637 additions & 0 deletions data/12-nodes/matilda_processed_weight-all.csv

Large diffs are not rendered by default.

4,583 changes: 4,583 additions & 0 deletions data/12-nodes/matilda_processed_weighted_unweighted-all.csv

Large diffs are not rendered by default.

3,947 changes: 3,947 additions & 0 deletions data/12-nodes/matilda_processed_weighted_unweighted-weight-only.csv

Large diffs are not rendered by default.

10,907 changes: 10,907 additions & 0 deletions data/initialisation_results_all_nodes.csv

Large diffs are not rendered by default.

4,901 changes: 4,901 additions & 0 deletions data/initialisation_results_nodes-10.csv

Large diffs are not rendered by default.

1,261 changes: 1,261 additions & 0 deletions data/initialisation_results_nodes-11.csv

Large diffs are not rendered by default.

1,211 changes: 1,211 additions & 0 deletions data/initialisation_results_nodes-13.csv

Large diffs are not rendered by default.

1,261 changes: 1,261 additions & 0 deletions data/initialisation_results_nodes-9.csv

Large diffs are not rendered by default.

4,562 changes: 0 additions & 4,562 deletions data/matilda_processed_weight.csv

This file was deleted.

2 changes: 1 addition & 1 deletion src/extract_runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def main():
try:
# Connect to MLFlow experiment
EXPERIMENT_NAME = "QAOA-Parameter-Initialisation"
NUM_NODES = 12
NUM_NODES = 11
graph_types = [
"Nearly Complete BiPartite",
"Uniform Random",
Expand Down
7 changes: 4 additions & 3 deletions src/matilda_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def load_and_process_data(file_path, source_type='graph_weight', feature_filter=

# Filter out rows with 'feature_weight_type' equal to 'None'
selected_df = selected_df[selected_df['feature_weight_type'] != 'None']

# selected_df = selected_df[selected_df['feature_weight_type'] == 'None']

# Create the 'Source' column based on the source_type parameter
if source_type == 'graph_weight':
Expand Down Expand Up @@ -171,11 +173,10 @@ def test_only_numeric_features():

if __name__ == "__main__":
file_path = "data/initialisation_results_nodes-12.csv"
source_types = ['graph_weight', 'graph', 'weight']

source_types = ['graph_weight', 'graph', 'weight', 'weighted_unweighted']
for source_type in source_types:
d_matilda = load_and_process_data(file_path, source_type, feature_filter=False)
output_file = f"data/12-nodes/matilda_processed_{source_type}.csv"
output_file = f"data/12-nodes/matilda_processed_{source_type}-weight-only.csv"

# Write to csv file
d_matilda.to_csv(output_file, index=False)
Expand Down
53 changes: 53 additions & 0 deletions src/plots_informs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def plot_ar_distribution(df, output_file='ar_distribution.png'):
# Create the FacetGrid
g = sns.FacetGrid(df_melted, col='Approximation Type', col_wrap=4, height=4, sharex=True, sharey=True)
g.map(sns.histplot, 'Approximation Ratio', bins=20, kde=False, color='blue', alpha=0.3)


g.set(xlim=(0, 1))

Expand All @@ -49,3 +50,55 @@ def plot_ar_distribution(df, output_file='ar_distribution.png'):

# Plot the distribution and save the plot
plot_ar_distribution(df)

# Read in the CSV files for all nodes
node_sizes = [9, 10, 11, 12, 13]
# Create a empty list to store the dataframes
dfs = []
for node_size in node_sizes:
fn = f'data/initialisation_results_nodes-{node_size}.csv'
df = pd.read_csv(fn)
# Cols that start with algo
algo_cols = [col for col in df.columns if col.startswith('algo')]
# AR cols
ar_cols = ['approximation_ratio_fixed_angles_constant',
'approximation_ratio_random',
'approximation_ratio_three_regular',
'approximation_ratio_qibpi',
'approximation_ratio_tqa',
'approximation_ratio_interp_p15',
'approximation_ratio_fourier_p15']

# Instance Class and Instance Size
instance_cols = ['num_nodes','graph_type', 'weight_type']

# Selected columns
selected_cols = ['run_id'] + instance_cols + ar_cols + algo_cols
df = df[selected_cols]

# Validate the columns
if not set(algo_cols).issubset(set(df.columns)):
print(f"Columns {algo_cols} not found in the dataframe {fn}.")
continue
else:
dfs.append(df)

# Print the number of rows and columns
print(f"Dataframe {fn} has {df.shape[0]} rows and {df.shape[1]} columns.")


# Concatenate the dataframes
df_all = pd.concat(dfs, ignore_index=True)

# Print columns
print(df_all.columns)
print(df_all.shape)
# Remove rows with NaN values
df_all = df_all.dropna()
print(df_all.head())

# Print num instances for the number of nodes
print(df_all['num_nodes'].value_counts())

# Write to a CSV file
df_all.to_csv('data/initialisation_results_all_nodes.csv', index=False)

0 comments on commit 26babf5

Please sign in to comment.