Skip to content

Commit

Permalink
feat: Adding ability for 3-layer params
Browse files Browse the repository at this point in the history
  • Loading branch information
vivekkatial committed May 29, 2024
1 parent 8f6a97d commit a249058
Show file tree
Hide file tree
Showing 8 changed files with 21,370 additions and 10 deletions.
2 changes: 1 addition & 1 deletion bin/build_qibpi.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ apptainer exec \
--graph_type "$1" \
--num_nodes 8 \
--weight_type "$2" \
--n_layers 15
--n_layers 3
3,947 changes: 3,947 additions & 0 deletions data/12-nodes/matilda_processed_graph.csv

Large diffs are not rendered by default.

3,947 changes: 3,947 additions & 0 deletions data/12-nodes/matilda_processed_graph_weight.csv

Large diffs are not rendered by default.

3,947 changes: 3,947 additions & 0 deletions data/12-nodes/matilda_processed_weight.csv

Large diffs are not rendered by default.

4,583 changes: 4,583 additions & 0 deletions data/12-nodes/matilda_processed_weighted_unweighted.csv

Large diffs are not rendered by default.

4,897 changes: 4,897 additions & 0 deletions data/initialisation_results_nodes-12.csv

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions src/extract_runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def main():
try:
# Connect to MLFlow experiment
EXPERIMENT_NAME = "QAOA-Parameter-Initialisation"
NUM_NODES = 10
NUM_NODES = 12
graph_types = [
"Nearly Complete BiPartite",
"Uniform Random",
Expand Down Expand Up @@ -85,8 +85,8 @@ def main():

if all_runs:
d_results = pd.concat(all_runs, ignore_index=True)
d_results.to_csv("data/initialisation_results.csv", index=False)
logging.info(f"Saved {len(d_results)} runs to data/initialisation_results.csv")
d_results.to_csv(f"data/initialisation_results_nodes-{NUM_NODES}.csv", index=False)
logging.info(f"Saved {len(d_results)} runs to data/initialisation_results_nodes-{NUM_NODES}.csv")
else:
logging.info("No runs found for the specified parameters.")

Expand Down
51 changes: 45 additions & 6 deletions src/matilda_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Hide pandas warnings
pd.options.mode.chained_assignment = None

def load_and_process_data(file_path, source_type='graph_weight'):
def load_and_process_data(file_path, source_type='graph_weight', feature_filter=False):
"""
Load and process the CSV file.
Expand Down Expand Up @@ -47,6 +47,9 @@ def load_and_process_data(file_path, source_type='graph_weight'):
# Handle NaN in weight_type
selected_df['feature_weight_type'] = selected_df['feature_weight_type'].fillna('None')

# Filter out rows with 'feature_weight_type' equal to 'None'
selected_df = selected_df[selected_df['feature_weight_type'] != 'None']

# Create the 'Source' column based on the source_type parameter
if source_type == 'graph_weight':
selected_df['Source'] = selected_df['feature_graph_type'] + ' ' + selected_df['feature_weight_type']
Expand All @@ -62,6 +65,40 @@ def load_and_process_data(file_path, source_type='graph_weight'):
# Remove the original 'feature_weight_type' and 'feature_graph_type' columns
selected_df.drop(columns=['feature_weight_type', 'feature_graph_type'], inplace=True)

# Feature filtering (remove non-weight related columns)
if feature_filter:

features = [
# Weighted features
"feature_maximum_weighted_degree",
"feature_max_weight",
"feature_mean_weight",
"feature_median_weight",
"feature_minimum_weighted_degree",
"feature_min_weight",
"feature_range_weight",
"feature_skewness_weight",
"feature_std_dev_weight",
"feature_variance_weight",
"feature_weighted_average_clustering",
"feature_weighted_average_shortest_path_length",
"feature_weighted_diameter",
"feature_weighted_radius",

# Laplacian features
"feature_laplacian_largest_eigenvalue",
"feature_laplacian_second_largest_eigenvalue",
"feature_ratio_of_two_largest_laplacian_eigenvaleus",

# Symmetry-related features
"feature_number_of_orbits",
"feature_group_size",
"feature_is_distance_regular",
"feature_regular"
]

selected_df = selected_df[['run_id', 'Source'] + algo_cols + features]

# Check for missing values
missing_values = selected_df.isnull().any(axis=1).sum()
print(f"Missing values: {missing_values}/{selected_df.shape[0]} ({missing_values / selected_df.shape[0]:.2%})")
Expand Down Expand Up @@ -130,15 +167,15 @@ def test_only_numeric_features():
assert len(non_numeric_cols) == 0, f"Non-numeric columns: {', '.join(non_numeric_cols)}"

## Load and process the data
d_matilda = load_and_process_data("data/initialisation_results-10-nodes.csv")
d_matilda = load_and_process_data("data/initialisation_results_nodes-12.csv")

if __name__ == "__main__":
file_path = "data/initialisation_results-10-nodes.csv"
source_types = ['graph_weight', 'graph', 'weight', 'weighted_unweighted']
file_path = "data/initialisation_results_nodes-12.csv"
source_types = ['graph_weight', 'graph', 'weight']

for source_type in source_types:
d_matilda = load_and_process_data(file_path, source_type)
output_file = f"data/matilda_processed_{source_type}.csv"
d_matilda = load_and_process_data(file_path, source_type, feature_filter=False)
output_file = f"data/12-nodes/matilda_processed_{source_type}.csv"

# Write to csv file
d_matilda.to_csv(output_file, index=False)
Expand All @@ -147,6 +184,8 @@ def test_only_numeric_features():
print(f"Processed data for source type '{source_type}':")
print(d_matilda.head())
print(d_matilda.info())
# Print the source distribution
print(d_matilda['Source'].value_counts())
print(f"Writing to {output_file}...")

pytest.main([__file__])

0 comments on commit a249058

Please sign in to comment.