Skip to content

Commit

Permalink
move key encoding to provider, update tests to reflect, standardise s…
Browse files Browse the repository at this point in the history
…torage_provider naming
  • Loading branch information
JJ11teen committed May 26, 2021
1 parent 4cfa586 commit fd61dc6
Show file tree
Hide file tree
Showing 11 changed files with 120 additions and 93 deletions.
39 changes: 18 additions & 21 deletions src/cloudmappings/cloudstoragemapping.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,34 @@
from functools import partial
from typing import MutableMapping, Dict
from urllib.parse import quote, unquote

from .storageproviders.storageprovider import StorageProvider


def _safe_key(key: str) -> str:
if not isinstance(key, str):
raise TypeError("Key must be of type 'str'. Got key:", key)
return quote(key)


def _unsafe_key(key: str) -> str:
return unquote(key)


class CloudMapping(MutableMapping):
_etags: Dict[str, str]

def __init__(
self,
storageprovider: StorageProvider,
storage_provider: StorageProvider,
sync_initially: bool = True,
) -> None:
self._storageprovider = storageprovider
self._storage_provider = storage_provider
self._etags = {}
if self._storageprovider.create_if_not_exists() and sync_initially:
if self._storage_provider.create_if_not_exists() and sync_initially:
self.sync_with_cloud()

def _encode_key(self, unsafe_key: str) -> str:
if not isinstance(unsafe_key, str):
raise TypeError("Key must be of type 'str'. Got key:", unsafe_key)
return self._storage_provider.encode_key(unsafe_key=unsafe_key)

def sync_with_cloud(self, key: str = None) -> None:
prefix_key = _safe_key(key) if key is not None else None
prefix_key = self._encode_key(key) if key is not None else None
self._etags.update(
{_unsafe_key(k): i for k, i in self._storageprovider.list_keys_and_etags(prefix_key).items()}
{
self._storage_provider.decode_key(k): i
for k, i in self._storage_provider.list_keys_and_etags(prefix_key).items()
}
)

@property
Expand All @@ -41,21 +38,21 @@ def etags(self):
def __getitem__(self, key: str) -> bytes:
if key not in self._etags:
raise KeyError(key)
return self._storageprovider.download_data(key=_safe_key(key), etag=self._etags[key])
return self._storage_provider.download_data(key=self._encode_key(key), etag=self._etags[key])

def __setitem__(self, key: str, value: bytes) -> None:
if not isinstance(value, bytes):
raise ValueError("Value must be bytes like")
self._etags[key] = self._storageprovider.upload_data(
key=_safe_key(key),
self._etags[key] = self._storage_provider.upload_data(
key=self._encode_key(key),
etag=self._etags.get(key, None),
data=value,
)

def __delitem__(self, key: str) -> None:
if key not in self._etags:
raise KeyError(key)
self._storageprovider.delete_data(key=_safe_key(key), etag=self._etags[key])
self._storage_provider.delete_data(key=self._encode_key(key), etag=self._etags[key])
del self._etags[key]

def __contains__(self, key: str) -> bool:
Expand All @@ -70,7 +67,7 @@ def __len__(self) -> int:
return len(self._etags)

def __repr__(self) -> str:
return f"cloudmapping<{self._storageprovider.safe_name()}>"
return f"cloudmapping<{self._storage_provider.logical_name()}>"

@classmethod
def with_buffers(cls, input_buffers, output_buffers, *args, **kwargs) -> "CloudMapping":
Expand Down
8 changes: 4 additions & 4 deletions src/cloudmappings/errors.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
class KeySyncError(ValueError):
def __init__(self, storageprovider_safe_name: str, key: str, etag: str) -> None:
def __init__(self, storage_provider_name: str, key: str, etag: str) -> None:
super().__init__(
f"Mapping is out of sync with cloud data.\n"
f"Cloud storage: '{storageprovider_safe_name}'\n"
f"Cloud storage: '{storage_provider_name}'\n"
f"Key: '{key}', etag: '{etag}'"
)


class ValueSizeError(ValueError):
def __init__(self, storageprovider_safe_name: str, key: str, size: int) -> None:
def __init__(self, storage_provider_name: str, key: str, size: int) -> None:
super().__init__(
f"Value is too big to fit in cloud."
f"Cloud storage: '{storageprovider_safe_name}'\n"
f"Cloud storage: '{storage_provider_name}'\n"
f"Key: '{key}', size: '{size}'"
)
2 changes: 1 addition & 1 deletion src/cloudmappings/storageproviders/awss3.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(
),
)

def safe_name(self) -> str:
def logical_name(self) -> str:
return "CloudStorageProvider=AWSS3," f"BucketName={self._bucket_name}"

def create_if_not_exists(self):
Expand Down
6 changes: 2 additions & 4 deletions src/cloudmappings/storageproviders/azureblobstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(
)
self._create_container_metadata = create_container_metadata

def safe_name(self) -> str:
def logical_name(self) -> str:
return (
"CloudStorageProvider=AzureBlobStorage,"
f"StorageAccountName={self._container_client.account_name},"
Expand Down Expand Up @@ -60,9 +60,7 @@ def upload_data(self, key: str, etag: str, data: bytes) -> str:
data=data,
**args,
)
except ResourceModifiedError:
self.raise_key_sync_error(key=key, etag=etag)
except ResourceExistsError:
except (ResourceExistsError, ResourceModifiedError):
self.raise_key_sync_error(key=key, etag=etag)
return json.loads(response["etag"])

Expand Down
2 changes: 1 addition & 1 deletion src/cloudmappings/storageproviders/googlecloudstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(
bucket_name=bucket_name,
)

def safe_name(self) -> str:
def logical_name(self) -> str:
return (
"CloudStorageProvider=GoogleCloudStorage,"
f"Project={self._client.project},"
Expand Down
13 changes: 10 additions & 3 deletions src/cloudmappings/storageproviders/storageprovider.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
from abc import ABC, abstractmethod
from typing import Dict
from urllib.parse import quote, unquote

from ..errors import KeySyncError, ValueSizeError


class StorageProvider(ABC):
def raise_key_sync_error(self, key: str, etag: str):
raise KeySyncError(storageprovider_safe_name=self.safe_name(), key=key, etag=etag)
raise KeySyncError(storage_provider_name=self.logical_name(), key=key, etag=etag)

def raise_value_size_error(self, key: str, size: int):
raise ValueSizeError(storageprovider_safe_name=self.safe_name(), key=key, size=size)
raise ValueSizeError(storage_provider_name=self.logical_name(), key=key, size=size)

def encode_key(self, unsafe_key) -> str:
return quote(unsafe_key, errors="strict")

def decode_key(self, encoded_key) -> str:
return unquote(encoded_key, errors="strict")

@abstractmethod
def safe_name(self) -> str:
def logical_name(self) -> str:
"""Returns a human readable string identifying the current implementation, and which logical cloud resouce it is currently mapping to. Does not include any credential information.
:return: String with identity information
"""
Expand Down
6 changes: 3 additions & 3 deletions src/cloudmappings/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@ class AzureBlobMapping(CloudMapping):
def __init__(self, *args, **kwargs) -> None:
from .storageproviders.azureblobstorage import AzureBlobStorageProvider

super().__init__(storageprovider=AzureBlobStorageProvider(*args, **kwargs))
super().__init__(storage_provider=AzureBlobStorageProvider(*args, **kwargs))


class GoogleCloudStorageMapping(CloudMapping):
def __init__(self, *args, **kwargs) -> None:
from .storageproviders.googlecloudstorage import GoogleCloudStorageProvider

super().__init__(storageprovider=GoogleCloudStorageProvider(*args, **kwargs))
super().__init__(storage_provider=GoogleCloudStorageProvider(*args, **kwargs))


class AWSS3Mapping(CloudMapping):
def __init__(self, *args, **kwargs) -> None:
from .storageproviders.awss3 import AWSS3Provider

super().__init__(storageprovider=AWSS3Provider(*args, **kwargs))
super().__init__(storage_provider=AWSS3Provider(*args, **kwargs))
57 changes: 36 additions & 21 deletions tests/tests/1_storageproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,59 +13,74 @@ def test_create_if_not_exists(self, storage_provider: StorageProvider):
assert storage_provider.create_if_not_exists() == False
assert storage_provider.create_if_not_exists() == True

def test_keys_are_encoded_decoded(self, storage_provider: StorageProvider):
alphanumeric = "simplekey0"
forwardslash = "/here/are/forward/slashes"
othercharacters = "/how.about_some ˆøœ¨åß∆∫ı˜unusual!@#$%^*characters"

assert alphanumeric == storage_provider.decode_key(storage_provider.encode_key(alphanumeric))
assert forwardslash == storage_provider.decode_key(storage_provider.encode_key(forwardslash))
assert othercharacters == storage_provider.decode_key(storage_provider.encode_key(othercharacters))

def test_data_is_stored(self, storage_provider: StorageProvider, test_id: str):
key = test_id + "-data-store-test"
encoded_key = storage_provider.encode_key(key)

etag = storage_provider.upload_data(key, None, b"data")
data = storage_provider.download_data(key, etag)
etag = storage_provider.upload_data(encoded_key, None, b"data")
data = storage_provider.download_data(encoded_key, etag)

assert data == b"data"

def test_keys_and_etags_are_listed(self, storage_provider: StorageProvider, test_id: str):
key_1 = test_id + "-keys-and-etags-list-test-1"
key_2 = test_id + "-keys-and-etags-list-test-2"
encoded_key_1 = storage_provider.encode_key(key_1)
encoded_key_2 = storage_provider.encode_key(key_2)

etag_1 = storage_provider.upload_data(key_1, None, b"data")
etag_2 = storage_provider.upload_data(key_2, None, b"data")
etag_1 = storage_provider.upload_data(encoded_key_1, None, b"data")
etag_2 = storage_provider.upload_data(encoded_key_2, None, b"data")
keys_and_etags = storage_provider.list_keys_and_etags(None)

assert key_1 in keys_and_etags
assert key_2 in keys_and_etags
assert etag_1 == keys_and_etags[key_1]
assert etag_2 == keys_and_etags[key_2]
assert encoded_key_1 in keys_and_etags
assert encoded_key_2 in keys_and_etags
assert etag_1 == keys_and_etags[encoded_key_1]
assert etag_2 == keys_and_etags[encoded_key_2]

for key, etag in keys_and_etags.items():
assert etag is not None, (key, etag)
for encoded_key, etag in keys_and_etags.items():
assert etag is not None, (encoded_key, etag)

def test_keys_are_deleted(self, storage_provider: StorageProvider, test_id: str):
key = test_id + "-keys-deleted-test"
encoded_key = storage_provider.encode_key(key)

etag = storage_provider.upload_data(key, None, b"data")
storage_provider.delete_data(key, etag)
etag = storage_provider.upload_data(encoded_key, None, b"data")
storage_provider.delete_data(encoded_key, etag)

cloud_key_list = storage_provider.list_keys_and_etags(key)
assert key not in cloud_key_list
cloud_key_list = storage_provider.list_keys_and_etags(encoded_key)
assert encoded_key not in cloud_key_list

def test_etags_are_enforced(self, storage_provider: StorageProvider, test_id: str):
key = test_id + "etags-enforced-test"
encoded_key = storage_provider.encode_key(key)

with pytest.raises(KeySyncError):
storage_provider.upload_data(key, "etag-when-none-existing", b"data")
storage_provider.upload_data(encoded_key, "etag-when-none-existing", b"data")

good_etag = storage_provider.upload_data(key, None, b"0")
good_etag = storage_provider.upload_data(encoded_key, None, b"0")
assert good_etag is not None

with pytest.raises(KeySyncError):
storage_provider.download_data(key, "bad-etag")
storage_provider.download_data(encoded_key, "bad-etag")
with pytest.raises(KeySyncError):
storage_provider.upload_data(key, "bad-etag", b"data")
storage_provider.upload_data(encoded_key, "bad-etag", b"data")
with pytest.raises(KeySyncError):
storage_provider.delete_data(key, "bad-etag")
storage_provider.delete_data(encoded_key, "bad-etag")

def test_etags_change_with_same_data(self, storage_provider: StorageProvider, test_id: str):
key = test_id + "etags-unique-same-data-test"
encoded_key = storage_provider.encode_key(key)

first_etag = storage_provider.upload_data(key, None, b"static-data")
second_etag = storage_provider.upload_data(key, first_etag, b"static-data")
first_etag = storage_provider.upload_data(encoded_key, None, b"static-data")
second_etag = storage_provider.upload_data(encoded_key, first_etag, b"static-data")

assert first_etag != second_etag
41 changes: 24 additions & 17 deletions tests/tests/2_singlecloudmapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,34 @@

class SingleCloudMappingTests:
def test_initialising_without_sync(self, storage_provider: StorageProvider):
CloudMapping(storageprovider=storage_provider, sync_initially=False)
CloudMapping(storage_provider=storage_provider, sync_initially=False)

def test_initialising_with_sync(self, storage_provider: StorageProvider):
CloudMapping(storageprovider=storage_provider, sync_initially=True)
CloudMapping(storage_provider=storage_provider, sync_initially=True)

def test_repr(self, storage_provider: StorageProvider):
cm = CloudMapping(storageprovider=storage_provider, sync_initially=False)
cm = CloudMapping(storage_provider=storage_provider, sync_initially=False)

_repr = str(cm)

assert "CloudStorageProvider=" in _repr

if "Azure" in _repr:
if "AzureBlob" in _repr:
assert "StorageAccountName=" in _repr
assert "ContainerName=" in _repr
elif "Google" in _repr:
elif "AzureTable" in _repr:
assert "StorageAccountName=" in _repr
assert "TableName=" in _repr
elif "GoogleCloudStorage" in _repr:
assert "Project=" in _repr
assert "BucketName=" in _repr
elif "AWS" in _repr:
elif "AWSS3" in _repr:
assert "BucketName=" in _repr
else:
pytest.fail("Unknown provider repr")

def test_non_byte_values_error(self, storage_provider: StorageProvider, test_id: str):
cm = CloudMapping(storageprovider=storage_provider, sync_initially=False)
cm = CloudMapping(storage_provider=storage_provider, sync_initially=False)
key = test_id + "non-bytes-error"

with pytest.raises(ValueError, match="must be bytes like"):
Expand All @@ -43,7 +48,7 @@ def test_non_byte_values_error(self, storage_provider: StorageProvider, test_id:
cm[key] = {"or": "something more", "elaborate": True}

def test_no_key_errors(self, storage_provider: StorageProvider, test_id: str):
cm = CloudMapping(storageprovider=storage_provider, sync_initially=False)
cm = CloudMapping(storage_provider=storage_provider, sync_initially=False)
key = test_id + "/no-key-errors-test"

with pytest.raises(KeyError):
Expand All @@ -53,7 +58,7 @@ def test_no_key_errors(self, storage_provider: StorageProvider, test_id: str):
assert key not in cm

def test_basic_setting_and_getting(self, storage_provider: StorageProvider, test_id: str):
cm = CloudMapping(storageprovider=storage_provider, sync_initially=False)
cm = CloudMapping(storage_provider=storage_provider, sync_initially=False)

cm[test_id + "-key-A"] = b"100"
cm[test_id + "-key-a"] = b"uncapitalised"
Expand All @@ -64,16 +69,18 @@ def test_basic_setting_and_getting(self, storage_provider: StorageProvider, test
assert cm[test_id + "-key-3"] == b"three"

def test_complex_keys(self, storage_provider: StorageProvider, test_id: str):
cm = CloudMapping(storageprovider=storage_provider, sync_initially=False)
cm = CloudMapping(storage_provider=storage_provider, sync_initially=False)
key1 = test_id + "/here/are/some/sub/dirs"
key2 = test_id + "/how.about_some ˆøœ¨åß∆∫ı˜unusual!@#$%^*characters"

cm[test_id + "/here/are/some/sub/dirs"] = b"0"
cm[test_id + "/howaboutsome ˆøœ¨åß∆∫ı˜ unusual !@#$%^* characters"] = b"1"
cm[key1] = b"0"
cm[key2] = b"1"

assert cm[test_id + "/here/are/some/sub/dirs"] == b"0"
assert cm[test_id + "/howaboutsome ˆøœ¨åß∆∫ı˜ unusual !@#$%^* characters"] == b"1"
assert cm[key1] == b"0"
assert cm[key2] == b"1"

def test_deleting_keys(self, storage_provider: StorageProvider, test_id: str):
cm = CloudMapping(storageprovider=storage_provider, sync_initially=False)
cm = CloudMapping(storage_provider=storage_provider, sync_initially=False)
key = test_id + "/delete-test"

cm[key] = b"0"
Expand All @@ -82,7 +89,7 @@ def test_deleting_keys(self, storage_provider: StorageProvider, test_id: str):
cm[key]

def test_contains(self, storage_provider: StorageProvider, test_id: str):
cm = CloudMapping(storageprovider=storage_provider, sync_initially=False)
cm = CloudMapping(storage_provider=storage_provider, sync_initially=False)
key = test_id + "/contains-test"

assert key not in cm
Expand All @@ -91,7 +98,7 @@ def test_contains(self, storage_provider: StorageProvider, test_id: str):
assert key in cm

def test_length(self, storage_provider: StorageProvider, test_id: str):
cm = CloudMapping(storageprovider=storage_provider, sync_initially=False)
cm = CloudMapping(storage_provider=storage_provider, sync_initially=False)
key_1 = test_id + "/length-test/1"
key_2 = test_id + "/length-test/2"
key_3 = test_id + "/length-test/3"
Expand Down
Loading

0 comments on commit fd61dc6

Please sign in to comment.