Skip to content

Commit

Permalink
Move sync-identical-files.py into public repo as sync-files.py
Browse files Browse the repository at this point in the history
We currently use a script to keep certain duplicate QL files in sync across the repo. For historical reasons, this script has lived in the private repo alongside the rest of CodeQL, even though it's only used for files in the public `ql` repo. This PR moves the script into the public `ql` repo. It is still invoked by Jenkins scripts that live in the private repo during CI, but it can also be invoked directly without having a checkout of the private repo. This is useful for anyone who is modifying the dataflow or IR libraries with only a QL checkout.
  • Loading branch information
dbartol committed Mar 29, 2020
1 parent 1baf5df commit 0952064
Show file tree
Hide file tree
Showing 2 changed files with 143 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
.vs/*
!.vs/VSWorkspaceSettings.json

# Byte-compiled python files
*.pyc

# It's useful (though not required) to be able to unpack codeql in the ql checkout itself
/codeql/
.vscode/settings.json
Expand Down
140 changes: 140 additions & 0 deletions config/sync-files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env python3

# Due to various technical limitations, we sometimes have files that need to be
# kept identical in the repository. This script loads a database of such
# files and can perform two functions: check whether they are still identical,
# and overwrite the others with a master copy if needed.

import hashlib
import shutil
import os
import sys
import json
import re
from os import path

file_groups = {}

def add_prefix(prefix, relative):
result = path.join(prefix, relative)
if path.commonprefix((path.realpath(result), path.realpath(prefix))) != \
path.realpath(prefix):
raise Exception("Path {} is not below {}".format(
result, prefix))
return result

def load_if_exists(prefix, json_file_relative):
json_file_name = path.join(prefix, json_file_relative)
if path.isfile(json_file_name):
print("Loading file groups from", json_file_name)
with open(json_file_name, 'r', encoding='utf-8') as fp:
raw_groups = json.load(fp)
prefixed_groups = {
name: [
add_prefix(prefix, relative)
for relative in relatives
]
for name, relatives in raw_groups.items()
}
file_groups.update(prefixed_groups)

# Generates a list of C# test files that should be in sync
def csharp_test_files():
test_file_re = re.compile('.*(Bad|Good)[0-9]*\\.cs$')
csharp_doc_files = {
file:os.path.join(root, file)
for root, dirs, files in os.walk("csharp/ql/src")
for file in files
if test_file_re.match(file)
}
return {
"C# test '" + file + "'" : [os.path.join(root, file), csharp_doc_files[file]]
for root, dirs, files in os.walk("csharp/ql/test")
for file in files
if file in csharp_doc_files
}

def file_checksum(filename):
with open(filename, 'rb') as file_handle:
return hashlib.sha1(file_handle.read()).hexdigest()

def check_group(group_name, files, master_file_picker, emit_error):
checksums = {file_checksum(f) for f in files}

if len(checksums) == 1:
return

master_file = master_file_picker(files)
if master_file is None:
emit_error(__file__, 0,
"Files from group '"+ group_name +"' not in sync.")
emit_error(__file__, 0,
"Run this script with a file-name argument among the "
"following to overwrite the remaining files with the contents "
"of that file or run with the --latest switch to update each "
"group of files from the most recently modified file in the group.")
for filename in files:
emit_error(__file__, 0, " " + filename)
else:
print(" Syncing others from", master_file)
for filename in files:
if filename == master_file:
continue
print(" " + filename)
os.replace(filename, filename + '~')
shutil.copy(master_file, filename)
print(" Backups written with '~' appended to file names")

def chdir_repo_root():
root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')
os.chdir(root_path)

def choose_master_file(master_file, files):
if master_file in files:
return master_file
else:
return None

def choose_latest_file(files):
latest_time = None
latest_file = None
for filename in files:
file_time = os.path.getmtime(filename)
if (latest_time is None) or (latest_time < file_time):
latest_time = file_time
latest_file = filename
return latest_file

local_error_count = 0
def emit_local_error(path, line, error):
print('ERROR: ' + path + ':' + line + " - " + error)
global local_error_count
local_error_count += 1

# This function is invoked directly by a CI script, which passes a different error-handling
# callback.
def sync_identical_files(emit_error):
if len(sys.argv) == 1:
master_file_picker = lambda files: None
elif len(sys.argv) == 2:
if sys.argv[1] == "--latest":
master_file_picker = lambda files: choose_latest_file(files)
elif os.path.isfile(sys.argv[1]):
master_file_picker = lambda files: choose_master_file(sys.argv[1], files)
else:
raise Exception("File not found")
else:
raise Exception("Bad command line or file not found")
chdir_repo_root()
load_if_exists('.', 'config/identical-files.json')
file_groups.update(csharp_test_files())
for group_name, files in file_groups.items():
check_group(group_name, files, master_file_picker, emit_error)

def main():
sync_identical_files(emit_local_error)
if local_error_count > 0:
exit(1)

if __name__ == "__main__":
main()

1 comment on commit 0952064

@myersmma
This comment was marked as off-topic.
Please sign in to comment.