-
Notifications
You must be signed in to change notification settings - Fork 0
/
experiment.py
110 lines (91 loc) · 3.59 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import logging
import pickle
import warnings
import click
import networkx as nx
import pandas as pd
import pygsp
from tqdm import tqdm
from utilities import khop_subgraph, laplacian_distance, non_pendant_edges, has_isolated_nodes, sample_edges
warnings.filterwarnings("ignore", category=FutureWarning)
logging.disable(logging.CRITICAL)
@click.command()
@click.option("--repeats", type=int, help="Number of times to repeat the experiment")
@click.option("--dataset", type=str, help="What type of graph to perturb (BA or sensor)")
@click.option("--proportion_remove", type=float, help="What proportion of edges to remove from the data")
@click.option("--n", type=int, help="Size of graphs")
@click.option("--m", default=3, type=int, help="Number of edges to attach from a new node to existing nodes in BA graph")
@click.option("--k", default=None, type=int, help="Size of k-hop neighbourbood")
def main(dataset, repeats, proportion_remove, k, n, m):
graph_generator = GraphGenerator(dataset, n, m)
if k is None:
experiment_name = f"{graph_generator}_{proportion_remove}"
else:
experiment_name = f"{graph_generator}_{proportion_remove}_{k}"
print(f"Running experiment {experiment_name}")
# Generate perturbed graphs
graphs = []
dfs = []
for repeat in tqdm(range(repeats)):
G = graph_generator.generate()
remove = int(proportion_remove * G.number_of_edges())
result = khop_edge_deletion(G, k, remove)
while result is None:
G = graph_generator.generate()
remove = int(proportion_remove * G.number_of_edges())
result = khop_edge_deletion(G, k, remove)
Gp, edges, node = result
graphs.append([G, Gp])
df = edges_to_df(edges)
df["khopnode"] = node
df["repeat"] = repeat
df["distance"] = laplacian_distance(G, Gp)
dfs.append(df)
dfs = pd.concat(dfs, ignore_index=True)
# Save results
pickle.dump(graphs, open(f"results/{experiment_name}.p", "wb"))
dfs.to_csv(f"results/{experiment_name}.csv")
class GraphGenerator:
def __init__(self, dataset, n, m=None):
self.dataset = dataset
self.n = n
self.m = m
def generate(self):
if self.dataset == "BA":
return nx.barabasi_albert_graph(self.n, self.m)
elif self.dataset == "Sensor":
G = pygsp.graphs.Sensor(self.n)
while not G.is_connected():
G = pygsp.graphs.Sensor(self.n)
return nx.Graph(G.W)
def __repr__(self):
if self.dataset == "BA":
return f"BA({self.n},{self.m})"
elif self.dataset == "Sensor":
return f"Sensor({self.n})"
def edges_to_df(edges):
data = []
for edge in edges:
data.append([min(edge), max(edge)])
return pd.DataFrame(data, columns=["u", "v"])
def khop_edge_deletion(G, k, r, max_iter=1000):
"""
Removes r edges which are in a k-hop neighbourhood of some node, the perturbed graph will not have isolated nodes
If k is None then the samples are taken uniformly
"""
solution, iteration = None, 0
while solution is None:
iteration = iteration + 1
if iteration == max_iter:
return None
subgraph, node = khop_subgraph(G, k) if k is not None else (G, None)
if len(non_pendant_edges(subgraph)) < r:
continue
edges = sample_edges(subgraph, r, non_pendant=True)
Gp = G.copy()
Gp.remove_edges_from(edges)
if not has_isolated_nodes(Gp):
solution = Gp
return solution, edges, node
if __name__ == "__main__":
main()