Skip to content

Commit

Permalink
add Azure Table Storage
Browse files Browse the repository at this point in the history
  • Loading branch information
JJ11teen committed May 28, 2021
1 parent 0f2aa2a commit 6dad99b
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 22 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pip install cloud-mappings

By default, `cloud-mappings` doesn't install any of the required storage providers dependencies. If you would like to install them alongside `cloud-mappings` you may run any combination of:
```
pip install cloud-mappings[azure,gcp,aws]
pip install cloud-mappings[azureblob,azuretable,gcpstorage,awss3]
```

## Instantiation
Expand Down
9 changes: 5 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[metadata]
# replace with your username:
name = cloud-mappings
version = 0.7.6
version = 0.8.0
author = Lucas Sargent
author_email = lucas.sargent@eliiza.com.au
description = MutableMapping interfaces for common cloud storage providers
Expand All @@ -24,9 +24,10 @@ install_requires =
zict>=2.0

[options.extras_require]
azure = azure-identity==1.6.0; azure-storage-blob==12.8.1
aws = boto3==1.17.73
gcp = google-cloud-storage==1.38.0
azureblob = azure-identity==1.6.0; azure-storage-blob==12.8.1
azuretable = azure-identity==1.6.0; azure-data-tables==12.0.0b7
gcpstorage = google-cloud-storage==1.38.0
awss3 = boto3==1.17.73
tests = pytest==6.2.4

[options.packages.find]
Expand Down
3 changes: 2 additions & 1 deletion src/cloudmappings/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .wrappers import AzureBlobMapping, GoogleCloudStorageMapping, AWSS3Mapping
from .wrappers import AzureBlobMapping, AzureTableMapping, GoogleCloudStorageMapping, AWSS3Mapping

__all__ = [
"AzureBlobMapping",
"AzureTableMapping",
"GoogleCloudStorageMapping",
"AWSS3Mapping",
]
129 changes: 129 additions & 0 deletions src/cloudmappings/storageproviders/azuretablestorage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import logging
from typing import Dict
from urllib.parse import quote, unquote

from azure.core import MatchConditions
from azure.core.exceptions import ResourceExistsError, HttpResponseError
from azure.data.tables import TableClient, UpdateMode

from .storageprovider import StorageProvider


def _chunk_bytes(data: bytes) -> Dict[str, bytes]:
# Max property size in azure tables is 64KiB
max_property_size = 64 * 1024
return {f"d_{k}": data[i : i + max_property_size] for k, i in enumerate(range(0, len(data), max_property_size))}


def _dechunk_entity(entity: Dict[str, bytes]) -> bytes:
return b"".join([v.value for k, v in entity.items() if k.startswith("d_")])


class AzureTableStorageProvider(StorageProvider):
def __init__(
self,
table_name: str,
connection_string: str = None,
endpoint: str = None,
credential=None,
) -> None:
if endpoint is None and connection_string is None:
raise ValueError("One of endpoint or connection_string must be supplied")
if connection_string is not None:
self._table_client = TableClient.from_connection_string(conn_str=connection_string, table_name=table_name)
else:
self._table_client = TableClient(
endpoint=endpoint,
table_name=table_name,
credential=credential,
)

def encode_key(self, unsafe_key) -> str:
return quote(unsafe_key, safe="", errors="strict")

def decode_key(self, encoded_key) -> str:
return unquote(encoded_key, errors="strict")

def logical_name(self) -> str:
return (
"CloudStorageProvider=AzureTableStorage,"
f"StorageAccountName={self._table_client.account_name},"
f"TableName={self._table_client.table_name}"
)

def create_if_not_exists(self):
try:
self._table_client.create_table()
except ResourceExistsError:
return True
return False

def download_data(self, key: str, etag: str) -> bytes:
entity = self._table_client.get_entity(
partition_key=key,
row_key="cm",
)
if etag is not None and etag != entity.metadata["etag"]:
self.raise_key_sync_error(key=key, etag=etag)
return _dechunk_entity(entity)

def upload_data(self, key: str, etag: str, data: bytes) -> str:
entity = {
"PartitionKey": key,
"RowKey": "cm",
**_chunk_bytes(data=data),
}
try:
if etag is None: # Not expecting existing data
response = self._table_client.create_entity(entity=entity)
else:
response = self._table_client.update_entity(
entity=entity,
mode=UpdateMode.REPLACE,
etag=etag,
match_condition=MatchConditions.IfNotModified,
)
except ResourceExistsError as e:
self.raise_key_sync_error(key=key, etag=etag, inner_exception=e)
except HttpResponseError as e:
if "update condition specified in the request was not satisfied" in e.exc_msg or (
"etag value" in e.exc_msg and "is not valid" in e.exc_msg
):
self.raise_key_sync_error(key=key, etag=etag, inner_exception=e)
elif (
e.model is not None
and e.model.additional_properties is not None
and "odata.error" in e.model.additional_properties
and "code" in e.model.additional_properties["odata.error"]
and e.model.additional_properties["odata.error"]["code"] == "EntityTooLarge"
):
self.raise_value_size_error(key=key, inner_exception=e)
else:
raise e
return response["etag"]

def delete_data(self, key: str, etag: str) -> None:
try:
self._table_client.delete_entity(
partition_key=key,
row_key="cm",
etag=etag,
match_condition=MatchConditions.IfNotModified,
)
except HttpResponseError as e:
if "update condition specified in the request was not satisfied" in e.exc_msg or (
"etag value" in e.exc_msg and "is not valid" in e.exc_msg
):
self.raise_key_sync_error(key=key, etag=etag, inner_exception=e)
else:
raise e

def list_keys_and_etags(self, key_prefix: str) -> Dict[str, str]:
if key_prefix is None:
query = self._table_client.list_entities()
else:
key_prefix_stop = key_prefix[:-1] + chr(ord(key_prefix[-1]) + 1)
query = self._table_client.query_entities(
f"PartitionKey ge '{key_prefix}' and PartitionKey lt '{key_prefix_stop}'"
)
return {e["PartitionKey"]: e.metadata["etag"] for e in query}
7 changes: 7 additions & 0 deletions src/cloudmappings/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ def __init__(self, *args, **kwargs) -> None:
super().__init__(storage_provider=AzureBlobStorageProvider(*args, **kwargs))


class AzureTableMapping(CloudMapping):
def __init__(self, *args, **kwargs) -> None:
from .storageproviders.azuretablestorage import AzureTableStorageProvider

super().__init__(storage_provider=AzureTableStorageProvider(*args, **kwargs))


class GoogleCloudStorageMapping(CloudMapping):
def __init__(self, *args, **kwargs) -> None:
from .storageproviders.googlecloudstorage import GoogleCloudStorageProvider
Expand Down
45 changes: 29 additions & 16 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,35 @@
import os
from uuid import uuid4
import logging

import pytest

from cloudmappings.storageproviders.azureblobstorage import AzureBlobStorageProvider
from cloudmappings.storageproviders.azuretablestorage import AzureTableStorageProvider
from cloudmappings.storageproviders.googlecloudstorage import GoogleCloudStorageProvider
from cloudmappings.storageproviders.awss3 import AWSS3Provider


def pytest_addoption(parser):
parser.addoption(
"--azure_storage_account_url",
"--azure_blob_storage_account_url",
action="store",
help="Azure Storage Account URL",
help="Azure Blob Storage Account URL",
)
parser.addoption(
"--gcp_project",
"--azure_table",
action="store_true",
help="Run Azure Table Storage Tests",
)
parser.addoption(
"--gcp_storage_project",
action="store",
help="GCP Project Id",
)
parser.addoption(
"--aws",
"--aws_s3",
action="store_true",
help="Run AWS Tests",
help="Run AWS S3 Tests",
)
parser.addoption(
"--test_container_id",
Expand All @@ -37,26 +44,32 @@ def pytest_addoption(parser):


def pytest_configure(config):
test_container_name = f"pytest-{config.getoption('test_container_id')}"
test_container_name = f"pytest{config.getoption('test_container_id')}"
logging.warning(f"Using cloud containers with the name: {test_container_name}")
logging.warning(f"Using keys with the prefix: {test_run_id}")

azure_storage_account_url = config.getoption("azure_storage_account_url")
if azure_storage_account_url is not None:
storage_providers["azure"] = AzureBlobStorageProvider(
account_url=azure_storage_account_url,
azure_blob_storage_account_url = config.getoption("azure_blob_storage_account_url")
if azure_blob_storage_account_url is not None:
storage_providers["azure_blob"] = AzureBlobStorageProvider(
account_url=azure_blob_storage_account_url,
container_name=test_container_name,
)

gcp_project = config.getoption("gcp_project")
if gcp_project is not None:
storage_providers["gcp"] = GoogleCloudStorageProvider(
project=gcp_project,
if config.getoption("azure_table") is not None:
storage_providers["azure_table"] = AzureTableStorageProvider(
connection_string=os.environ["AZURE_TABLE_STORAGE_CONNECTION_STRING"],
table_name=test_container_name,
)

gcp_storage_project = config.getoption("gcp_storage_project")
if gcp_storage_project is not None:
storage_providers["gcp_storage"] = GoogleCloudStorageProvider(
project=gcp_storage_project,
bucket_name=test_container_name,
)

if config.getoption("aws"):
storage_providers["aws"] = AWSS3Provider(
if config.getoption("aws_s3"):
storage_providers["aws_s3"] = AWSS3Provider(
bucket_name=test_container_name,
)

Expand Down

0 comments on commit 6dad99b

Please sign in to comment.