Skip to content

Commit

Permalink
Add snowdrop upgrade command (0.0/0.1 to 0.2)
Browse files Browse the repository at this point in the history
Finds standalone scripts in snowdrop/upgrade/upgrade_*.py, which are converted into CLI commands.
They should only rely on currently available library behaviour, everything else needs to be hardcoded into them.
  • Loading branch information
rcoup committed Oct 7, 2019
1 parent 5afc2ba commit 8a49e90
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 3 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
pytest.xml
.coverage
/.pytest*
/dist
/build
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from setuptools import setup
from setuptools import setup, find_packages

setup(
name='snowdrop',
Expand All @@ -8,7 +8,7 @@
author='Koordinates Limited',
author_email='support@koordinates.com',
license='Proprietary',
packages=['snowdrop'],
packages=find_packages(),
zip_safe=False,
entry_points={
'console_scripts': [
Expand Down
3 changes: 2 additions & 1 deletion snowdrop/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pygit2

from . import core # noqa
from . import checkout, clone, commit, diff, init, fsck, merge, pull, status, query
from . import checkout, clone, commit, diff, init, fsck, merge, pull, status, query, upgrade


def print_version(ctx, param, value):
Expand Down Expand Up @@ -67,6 +67,7 @@ def cli(ctx, repo_dir):
cli.add_command(pull.pull)
cli.add_command(status.status)
cli.add_command(query.query)
cli.add_command(upgrade.upgrade)


@cli.command("workingcopy-set-path")
Expand Down
27 changes: 27 additions & 0 deletions snowdrop/upgrade/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import importlib.util
from pathlib import Path

import click


class UpgradeCommand(click.MultiCommand):
plugin_path = Path(__file__).parent

def list_commands(self, ctx):
rv = []
for filename in self.plugin_path.glob("upgrade_*.py"):
rv.append(filename.stem[8:].replace('_', '-'))
rv.sort()
return rv

def get_command(self, ctx, name):
module_name = f"snowdrop.upgrade.upgrade_{name.replace('-', '_')}"

module = importlib.import_module(module_name)
return module.upgrade


@click.command(cls=UpgradeCommand)
def upgrade():
""" Upgrade repositories between versions of Snowdrop """
pass
178 changes: 178 additions & 0 deletions snowdrop/upgrade/upgrade_00_02.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
#!/usr/bin/env python3

import json
import re
import subprocess
from datetime import datetime
from pathlib import Path

import click
import pygit2

from snowdrop.core import walk_tree
from snowdrop.structure import Dataset02


@click.command()
@click.argument('source', type=click.Path(exists=True, file_okay=False), required=True)
@click.argument('dest', type=click.Path(exists=False, writable=True), required=True)
@click.argument('layer', required=True)
def upgrade(source, dest, layer):
"""
Upgrade a v0.0/v0.1 Snowdrop repository to Sno v0.2
"""
source = Path(source)
dest = Path(dest)

if dest.exists():
raise click.BadParameter(f"'{dest}': already exists", param_hint="DEST")

source_repo = pygit2.Repository(str(source))
if not source_repo or not source_repo.is_bare:
raise click.BadParameter(f"'{source}': not an existing repository", param_hint="SOURCE")

try:
source_tree = (source_repo.head.peel(pygit2.Tree) / layer).obj
except KeyError:
raise click.BadParameter(f"'{layer}' not found in source repository", param_hint="SOURCE")

try:
version_data = json.loads((source_tree / 'meta' / 'version').obj.data)
version = tuple([int(v) for v in version_data['version'].split('.')])
except Exception:
raise click.BadParameter("Error getting source repository version", param_hint="SOURCE")

if version >= (0, 2):
raise click.BadParameter(f"Expecting version <0.2, got {version_data['version']}", param_hint="SOURCE")

# action!
print(f"Initialising {dest} ...")
dest.mkdir()
dest_repo = pygit2.init_repository(str(dest), bare=True)

# walk _all_ references
source_walker = source_repo.walk(
source_repo.head.target,
pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE
)
for ref in source_repo.listall_reference_objects():
source_walker.push(ref.resolve().target)

commit_map = {}

print("\nWriting new commits ...")
for i, source_commit in enumerate(source_walker):
dest_parents = []
for parent_id in source_commit.parent_ids:
try:
dest_parents.append(commit_map[parent_id.hex])
except KeyError:
raise ValueError(f"Commit {i} ({source_commit.id}): Haven't seen parent ({parent_id})")

source_tree = (source_commit.peel(pygit2.Tree) / layer).obj

sqlite_table_info = json.loads((source_tree / 'meta' / 'sqlite_table_info').obj.data.decode('utf8'))
field_cid_map = {r['name']: r['cid'] for r in sqlite_table_info}

try:
gpkg_geometry_columns = json.loads((source_tree / 'meta' / 'gpkg_geometry_columns').obj.data.decode('utf8'))
except KeyError:
geom_field = []
else:
geom_field = gpkg_geometry_columns['column_name']

pk_field = None
for field in sqlite_table_info:
if field["pk"]:
pk_field = field["name"]
break
else:
if sqlite_table_info[0]["type"] == "INTEGER":
pk_field = sqlite_table_info[0]['name']
else:
raise ValueError("No primary key field found")

if i == 0:
print(f" {layer}: Geometry={geom_field} PrimaryKey={pk_field}")

dataset = Dataset02(None, layer)
version = json.dumps({"version": dataset.VERSION_IMPORT}).encode('utf8')

feature_count = 0

index = pygit2.Index()
for top_tree, top_path, subtree_names, blob_names in walk_tree(source_tree):
if top_path == 'meta':
# copy meta across as-is
for blob_name in blob_names:
if blob_name == 'version':
# except version which we update
dest_blob = dest_repo.create_blob(version)

else:
source_blob = (top_tree / blob_name).obj
dest_blob = dest_repo.create_blob(source_blob.data)

index.add(pygit2.IndexEntry(
f'{layer}/.sno-table/{top_path}/{blob_name}',
dest_blob,
pygit2.GIT_FILEMODE_BLOB
))

elif re.match(r'^features/[a-f0-9]{4}/([a-f0-9]{8}-(?:[a-f0-9]{4}-){3}[a-f0-9]{12})$', top_path):
# feature path
source_feature_dict = {}
for attr in blob_names:
source_blob = (top_tree / attr).obj
if attr == geom_field:
source_feature_dict[attr] = source_blob.data
else:
source_feature_dict[attr] = json.loads(source_blob.data.decode('utf8'))

dataset.write_feature(
source_feature_dict,
dest_repo,
index,
field_cid_map=field_cid_map,
geom_cols=[geom_field],
primary_key=pk_field,
)
feature_count += 1

elif top_path == '' or re.match(r'^features(/[a-f0-9]{4})?$', top_path):
pass
else:
raise ValueError(f"Unexpected path: '{top_path}'")

dest_tree = index.write_tree(dest_repo)
dest_commit = dest_repo.create_commit(
"HEAD",
source_commit.author,
source_commit.committer,
source_commit.message,
dest_tree,
dest_parents,
# source_commit.message_encoding,
)
commit_map[source_commit.hex] = dest_commit.hex

commit_time = datetime.fromtimestamp(source_commit.commit_time)
print(f" {i}: {source_commit.hex[:8]}{dest_commit.hex[:8]} ({commit_time}; {source_commit.committer.name}; {feature_count} rows)")

print(f"{i+1} commits processed.")

print("\nUpdating references ...")
for ref in source_repo.listall_reference_objects():
if ref.type == pygit2.GIT_REF_OID:
# real references
target = commit_map[ref.target.hex]
dest_repo.references.create(ref.name, target, True) # overwrite
print(f" {ref.name} ({ref.target.hex[:8]}{target[:8]})")

for ref in source_repo.listall_reference_objects():
if ref.type == pygit2.GIT_REF_SYMBOLIC:
dest_repo.references.create(ref.name, ref.target)
print(f" {ref.name}{ref.target}")

print("\nGarbage-collecting ...")
subprocess.check_call(["git", "-C", str(dest), "gc"])

0 comments on commit 8a49e90

Please sign in to comment.