Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

more dynamic s3 parameters #506

Merged
merged 1 commit into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
more dynamic s3 parameters
  • Loading branch information
iu-dpaul committed Jun 6, 2024
commit cb9e46f88fd01571c6d23a39b60111aa0cd189d1
4 changes: 4 additions & 0 deletions services/engine/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,8 @@ S3_AWS_ACCESS_KEY_ID=
S3_AWS_SECRET_ACCESS_KEY=
ONLY_STORE_CSV_FILES_LOCALLY = False # Set to True if only want to save generated CSV files locally instead of S3. Note that if stored locally they should be treated as ephemeral, i.e., they will disappear when the engine is restarted.

# Used for the s3 alternative minio (see docker-compose.yml)
MINIO_ROOT_USER=
MINIO_ROOT_PASSWORD=

CORE_PORT = 80 # This env var defines the port that will be exposed by the container. It serves as the configuration for both the internal and external container ports.
3 changes: 3 additions & 0 deletions services/engine/dataherald/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class Settings(BaseSettings):
db_uri: str | None = os.environ.get("MONGODB_URI")
openai_api_key: str | None = os.environ.get("OPENAI_API_KEY")
encrypt_key: str = os.environ.get("ENCRYPT_KEY")
s3_custom_endpoint: str | None = os.environ.get("S3_CUSTOM_ENDPOINT")
s3_bucket_name: str = os.environ.get("S3_BUCKET_NAME", "k2-core")
s3_region: str | None = os.environ.get("S3_REGION", "us-east-1")
s3_aws_access_key_id: str | None = os.environ.get("S3_AWS_ACCESS_KEY_ID")
s3_aws_secret_access_key: str | None = os.environ.get("S3_AWS_SECRET_ACCESS_KEY")
# Needed for Azure OpenAI integration:
Expand Down
60 changes: 35 additions & 25 deletions services/engine/dataherald/utils/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,44 @@ class S3:
def __init__(self):
self.settings = Settings()

def _get_client(self, access_key: str | None = None, secret_access_key: str | None = None, region: str | None = None): -> boto3.client:
_access_key = access_key or self.settings.s3_aws_access_key_id
_secret_access_key = secret_access_key or self.settings.s3_aws_secret_access_key
_region = region or self.settings.s3_region

if self.settings.s3_custom_endpoint:
return boto3.client(
"s3",
endpoint_url=self.settings.s3_custom_endpoint,
aws_session_token=None,
aws_access_key_id=_access_key,
aws_secret_access_key=_secret_access_key,
region_name=_region,
)

return boto3.client(
"s3",
aws_access_key_id=_access_key,
aws_secret_access_key=_secret_access_key,
region_name=_region,
)

def upload(self, file_location, file_storage: FileStorage | None = None) -> str:
file_name = file_location.split("/")[-1]
bucket_name = "k2-core"
bucket_name = self.settings.s3_bucket_name

# Upload the file
if file_storage:
fernet_encrypt = FernetEncrypt()
bucket_name = file_storage.bucket
s3_client = boto3.client(
"s3",
aws_access_key_id=fernet_encrypt.decrypt(file_storage.access_key_id),
aws_secret_access_key=fernet_encrypt.decrypt(
file_storage.secret_access_key
),
region_name=file_storage.region,
s3_client = self._get_client(
access_key=fernet_encrypt.decrypt(file_storage.access_key_id),
secret_access_key=fernet_encrypt.decrypt(file_storage.secret_access_key),
region=file_storage.region,
)
else:
s3_client = boto3.client(
"s3",
aws_access_key_id=self.settings.s3_aws_access_key_id,
aws_secret_access_key=self.settings.s3_aws_secret_access_key,
)
s3_client = self._get_client()

s3_client.upload_file(
file_location, bucket_name, os.path.basename(file_location)
)
Expand All @@ -45,20 +61,14 @@ def download(self, path: str, file_storage: FileStorage | None = None) -> str:
path = path.split("/")
if file_storage:
fernet_encrypt = FernetEncrypt()
s3_client = boto3.client(
"s3",
aws_access_key_id=fernet_encrypt.decrypt(file_storage.access_key_id),
aws_secret_access_key=fernet_encrypt.decrypt(
file_storage.secret_access_key
),
region_name=file_storage.region,
s3_client = self._get_client(
access_key=fernet_encrypt.decrypt(file_storage.access_key_id),
secret_access_key=fernet_encrypt.decrypt(file_storage.secret_access_key),
region=file_storage.region,
)
else:
s3_client = boto3.client(
"s3",
aws_access_key_id=self.settings.s3_aws_access_key_id,
aws_secret_access_key=self.settings.s3_aws_secret_access_key,
)
s3_client = self._get_client()

file_location = f"tmp/{path[-1]}"
s3_path = path[-1]
if len(s3_path[3:]) > 1:
Expand Down
16 changes: 16 additions & 0 deletions services/engine/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,22 @@ services:
MONGO_INITDB_DATABASE: "${MONGODB_DB_NAME}"
networks:
- dataherald_network
# uncomment if you want to use the s3 alternative minio instead of AWS S3
# you need to setup a bucket in the web interface (http://localhost:9001)
# minio:
# container_name: s3
# image: minio/minio
# networks:
# - dataherald_network
# ports:
# - "9000:9000"
# - "9001:9001"
# volumes:
# - ./s3data:/data
# environment:
# MINIO_ROOT_USER: "${MINIO_ROOT_USER:-dataherald}"
# MINIO_ROOT_PASSWORD: "${MINIO_ROOT_PASSWORD:-dataherald"}
# command: server --console-address ":9001" /data
networks:
dataherald_network:
external: true
21 changes: 16 additions & 5 deletions services/enterprise/utils/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,22 @@ def upload(self, file: UploadFile) -> str:
def _upload_file(self, file_location: str, file_name: str) -> str:
bucket_name = aws_s3_settings.s3_bucket_name
# Upload the file
s3_client = boto3.client(
"s3",
aws_access_key_id=aws_s3_settings.s3_aws_access_key_id,
aws_secret_access_key=aws_s3_settings.s3_aws_secret_access_key,
)
s3_client: boto3.client = None
if aws_s3_settings.s3_custom_endpoint:
s3_client = boto3.client(
"s3",
aws_access_key_id=aws_s3_settings.s3_aws_access_key_id,
aws_secret_access_key=aws_s3_settings.s3_aws_secret_access_key,
endpoint_url=aws_s3_settings.s3_custom_endpoint,
aws_session_token=None,
)
else:
s3_client = boto3.client(
"s3",
aws_access_key_id=aws_s3_settings.s3_aws_access_key_id,
aws_secret_access_key=aws_s3_settings.s3_aws_secret_access_key,
)

s3_client.upload_file(
file_location, bucket_name, os.path.basename(file_location)
)
Expand Down