-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathblock.py
62 lines (51 loc) · 1.7 KB
/
block.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python3
import argparse
import glob
import os
import subprocess
from pathlib import Path
from zipfile import ZipFile
def parse_arguments():
parser = argparse.ArgumentParser(
description="Tool for garbling PII for PPRL purposes in the CODI project"
)
parser.add_argument(
"--schemafile",
default="example-schema/blocking-schema/lambda.json",
help="Path to blocking schema."
" Default: example-schema/blocking-schema/lambda.json",
)
parser.add_argument(
"--clkpath",
default="output",
help="Specify a folder containing clks. Default is 'output' folder",
)
args = parser.parse_args()
if not Path(args.schemafile).exists():
parser.error("Unable to find schema file: " + args.schemafile)
return args
def block_individuals(args):
os.makedirs("temp-data", exist_ok=True)
os.makedirs("output", exist_ok=True)
schema_file = Path(args.schemafile)
clk_files = glob.glob(os.path.join(args.clkpath, "*.json"))
blocked_files = []
for clk in clk_files:
clk_path = Path(clk)
temp_file = Path("temp-data", clk.split("/")[-1])
subprocess.run(
["anonlink", "block", str(clk_path), str(schema_file), str(temp_file)],
check=True,
)
blocked_files.append(temp_file)
return blocked_files
def zip_blocked_files(blocked_files):
with ZipFile("output/garbled_blocked.zip", "w") as garbled_zip:
for blocked_file in blocked_files:
garbled_zip.write(blocked_file)
def main():
args = parse_arguments()
blocked_files = block_individuals(args)
zip_blocked_files(blocked_files)
if __name__ == "__main__":
main()