Skip to content

Commit

Permalink
linting
Browse files Browse the repository at this point in the history
  • Loading branch information
brimoor committed Feb 11, 2022
1 parent c444503 commit 0642129
Showing 1 changed file with 22 additions and 22 deletions.
44 changes: 22 additions & 22 deletions fiftyone/utils/aws.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""
Utilities for working with
`Amazon Web Services <https://aws.amazon.com/>`
S3 storage buckets.
Utilities for working with `Amazon Web Services <https://aws.amazon.com>`.
| Copyright 2017-2022, Voxel51, Inc.
| `voxel51.com <https://voxel51.com/>`_
Expand All @@ -16,51 +14,52 @@
import boto3
import botocore

import fiftyone.core.utils as fou

import eta.core.utils as etau

import fiftyone.core.utils as fou


logger = logging.getLogger(__name__)


def download_public_s3_files(
urls, download_dir=None, num_workers=None, overwrite=True,
urls, download_dir=None, num_workers=None, overwrite=True
):
"""Download files from a public AWS S3 bucket using unsigned urls.
"""Download files from a public AWS S3 bucket using unsigned URLs.
The `url` argument either accepts:
* A list of paths to objects in the s3 bucket::
- A list of paths to objects in the s3 bucket::
urls = ["s3://bucket_name/dir1/file1.ext", ...]
urls = ["s3://bucket_name/dir1/file1.ext", ...]
When `urls` is a list, then the `download_dir` argument is required
and all objects will be downloaded into that directory
When `urls` is a list, then the `download_dir` argument is required
and all objects will be downloaded into that directory
* A dictionary mapping the paths of objects to files on disk
to store each object::
- A dictionary mapping the paths of objects to files on disk to store
each object::
urls = {
"s3://bucket_name/dir1/file1.ext": "/path/to/local/file1.ext",
...
}
urls = {
"s3://bucket_name/dir1/file1.ext": "/path/to/local/file1.ext",
...
}
Args:
urls: either a list of urls to objects in an s3 bucket, or a dict
mapping these urls to locations on disk. If `urls` is a list, then
urls: either a list of URLs to objects in an s3 bucket, or a dict
mapping these URLs to locations on disk. If `urls` is a list, then
the `download_dir` argument is required
download_dir (None): the directory to store all downloaded objects.
This is only used if `urls` is a list
num_workers (None): the number of processes to use when downloading
files. By default, ``multiprocessing.cpu_count()`` is used
overwrite (True): whether to overwrite existing files
"""
if isinstance(urls, list):
if not isinstance(urls, dict):
if download_dir is None:
raise ValueError(
"When `urls` is a list, `download_dir` is required but was found to be `None`."
"`download_dir` is required when `urls` is a list"
)

urls = {url: None for url in urls}

if download_dir:
Expand Down Expand Up @@ -96,14 +95,15 @@ def _build_inputs(urls, s3_client, download_dir=None, overwrite=True):
bucket_name, object_path = _parse_url(url)
if filepath is None:
filepath = os.path.join(download_dir, object_path)

if not os.path.isfile(filepath):
inputs.append((bucket_name, object_path, filepath, s3_client))
else:
if overwrite:
os.remove(filepath)
else:
logger.warning(
"File `%s` already exists, skipping..." % filepath
"File '%s' already exists, skipping...", filepath
)

return inputs
Expand Down

0 comments on commit 0642129

Please sign in to comment.