Skip to content

Commit

Permalink
Upload/download files to/from S3 (aws#512)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielwo authored Jan 18, 2021
1 parent e2d57c5 commit 934601f
Show file tree
Hide file tree
Showing 6 changed files with 493 additions and 672 deletions.
4 changes: 4 additions & 0 deletions awswrangler/s3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from awswrangler.s3._copy import copy_objects, merge_datasets # noqa
from awswrangler.s3._delete import delete_objects # noqa
from awswrangler.s3._describe import describe_objects, get_bucket_region, size_objects # noqa
from awswrangler.s3._download import download # noqa
from awswrangler.s3._list import does_object_exist, list_directories, list_objects # noqa
from awswrangler.s3._read_excel import read_excel # noqa
from awswrangler.s3._read_parquet import read_parquet, read_parquet_metadata, read_parquet_table # noqa
from awswrangler.s3._read_text import read_csv, read_fwf, read_json # noqa
from awswrangler.s3._upload import upload # noqa
from awswrangler.s3._wait import wait_objects_exist, wait_objects_not_exist # noqa
from awswrangler.s3._write_excel import to_excel # noqa
from awswrangler.s3._write_parquet import store_parquet_metadata, to_parquet # noqa
Expand Down Expand Up @@ -36,4 +38,6 @@
"to_json",
"to_excel",
"read_excel",
"download",
"upload",
]
76 changes: 76 additions & 0 deletions awswrangler/s3/_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Amazon S3 Download Module (PRIVATE)."""

import logging
from typing import Any, Dict, Optional, Union

import boto3

from awswrangler import _utils
from awswrangler.s3._fs import open_s3_object

_logger: logging.Logger = logging.getLogger(__name__)


def download(
path: str,
local_file: Union[str, Any],
use_threads: bool = True,
boto3_session: Optional[boto3.Session] = None,
s3_additional_kwargs: Optional[Dict[str, Any]] = None,
) -> None:
"""Download file from from a received S3 path to local file.
Note
----
In case of `use_threads=True` the number of threads
that will be spawned will be gotten from os.cpu_count().
Parameters
----------
path : str
S3 path (e.g. ``s3://bucket/key0``).
local_file : Union[str, Any]
A file-like object in binary mode or a path to local file (e.g. ``./local/path/to/key0``).
use_threads : bool
True to enable concurrent requests, False to disable multiple threads.
If enabled os.cpu_count() will be used as the max number of threads.
boto3_session : boto3.Session(), optional
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
s3_additional_kwargs : Optional[Dict[str, Any]]
Forward to botocore requests, only "SSECustomerAlgorithm" and "SSECustomerKey" arguments will be considered.
Returns
-------
None
Examples
--------
Downloading a file using a path to local file
>>> import awswrangler as wr
>>> wr.s3.download(path='s3://bucket/key', local_file='./key')
Downloading a file using a file-like object
>>> import awswrangler as wr
>>> with open(file='./key', mode='wb') as local_f:
>>> wr.s3.download(path='s3://bucket/key', local_file=local_f)
"""
session: boto3.Session = _utils.ensure_session(session=boto3_session)
_logger.debug("path: %s", path)
with open_s3_object(
path=path,
mode="rb",
use_threads=use_threads,
s3_block_size=-1, # One shot download
s3_additional_kwargs=s3_additional_kwargs,
boto3_session=session,
) as s3_f:
if isinstance(local_file, str):
_logger.debug("Downloading local_file: %s", local_file)
with open(file=local_file, mode="wb") as local_f:
local_f.write(s3_f.read())
else:
_logger.debug("Downloading file-like object.")
local_file.write(s3_f.read())
76 changes: 76 additions & 0 deletions awswrangler/s3/_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Amazon S3 Upload Module (PRIVATE)."""

import logging
from typing import Any, Dict, Optional, Union

import boto3

from awswrangler import _utils
from awswrangler.s3._fs import open_s3_object

_logger: logging.Logger = logging.getLogger(__name__)


def upload(
local_file: Union[str, Any],
path: str,
use_threads: bool = True,
boto3_session: Optional[boto3.Session] = None,
s3_additional_kwargs: Optional[Dict[str, Any]] = None,
) -> None:
"""Upload file from a local file to received S3 path.
Note
----
In case of `use_threads=True` the number of threads
that will be spawned will be gotten from os.cpu_count().
Parameters
----------
local_file : Union[str, Any]
A file-like object in binary mode or a path to local file (e.g. ``./local/path/to/key0``).
path : str
S3 path (e.g. ``s3://bucket/key0``).
use_threads : bool
True to enable concurrent requests, False to disable multiple threads.
If enabled os.cpu_count() will be used as the max number of threads.
boto3_session : boto3.Session(), optional
Boto3 Session. The default boto3 session will be used if boto3_session receive None.
s3_additional_kwargs : Optional[Dict[str, Any]]
Forward to botocore requests, only "SSECustomerAlgorithm" and "SSECustomerKey" arguments will be considered.
Returns
-------
None
Examples
--------
Uploading a file using a path to local file
>>> import awswrangler as wr
>>> wr.s3.upload(local_file='./key', path='s3://bucket/key')
Uploading a file using a file-like object
>>> import awswrangler as wr
>>> with open(file='./key', mode='wb') as local_f:
>>> wr.s3.upload(local_file=local_f, path='s3://bucket/key')
"""
session: boto3.Session = _utils.ensure_session(session=boto3_session)
_logger.debug("path: %s", path)
with open_s3_object(
path=path,
mode="wb",
use_threads=use_threads,
s3_block_size=-1, # One shot download
s3_additional_kwargs=s3_additional_kwargs,
boto3_session=session,
) as s3_f:
if isinstance(local_file, str):
_logger.debug("Uploading local_file: %s", local_file)
with open(file=local_file, mode="rb") as local_f:
s3_f.write(local_f.read())
else:
_logger.debug("Uploading file-like object.")
s3_f.write(local_file.read())
2 changes: 2 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Amazon S3
delete_objects
describe_objects
does_object_exist
download
get_bucket_region
list_directories
list_objects
Expand All @@ -47,6 +48,7 @@ Amazon S3
to_excel
to_json
to_parquet
upload
wait_objects_exist
wait_objects_not_exist

Expand Down
61 changes: 61 additions & 0 deletions tests/test_moto.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,67 @@ def test_csv(moto_s3):
assert len(df.columns) == 10


def test_download_file(moto_s3, tmp_path):
bucket = "bucket"
key = "foo.tmp"
content = b"foo"

s3_object = moto_s3.Object(bucket, key)
s3_object.put(Body=content)

path = "s3://{}/{}".format(bucket, key)
local_file = tmp_path / key
wr.s3.download(path=path, local_file=str(local_file))
assert local_file.read_bytes() == content


def test_download_fileobj(moto_s3, tmp_path):
bucket = "bucket"
key = "foo.tmp"
content = b"foo"

s3_object = moto_s3.Object(bucket, key)
s3_object.put(Body=content)

path = "s3://{}/{}".format(bucket, key)
local_file = tmp_path / key

with open(local_file, "wb") as local_f:
wr.s3.download(path=path, local_file=local_f)
assert local_file.read_bytes() == content


def test_upload_file(moto_s3, tmp_path):
bucket = "bucket"
key = "foo.tmp"
content = b"foo"

path = "s3://{}/{}".format(bucket, key)
local_file = tmp_path / key

local_file.write_bytes(content)
wr.s3.upload(local_file=str(local_file), path=path)

s3_object = moto_s3.Object(bucket, key)
assert s3_object.get()["Body"].read() == content


def test_upload_fileobj(moto_s3, tmp_path):
bucket = "bucket"
key = "foo.tmp"
content = b"foo"

path = "s3://{}/{}".format(bucket, key)
local_file = tmp_path / key

local_file.write_bytes(content)
with open(local_file, "rb") as local_f:
wr.s3.upload(local_file=local_f, path=path)

s3_object = moto_s3.Object(bucket, key)
assert s3_object.get()["Body"].read() == content


def test_read_csv_with_chucksize_and_pandas_arguments(moto_s3):
path = "s3://bucket/test.csv"
wr.s3.to_csv(df=get_df_csv(), path=path, index=False)
Expand Down
Loading

0 comments on commit 934601f

Please sign in to comment.