Skip to content

Commit

Permalink
Adding tarfile member sanitization to extractall()
Browse files Browse the repository at this point in the history
  • Loading branch information
TrellixVulnTeam committed Nov 23, 2022
1 parent f0dbc12 commit 6a41db4
Showing 1 changed file with 40 additions and 2 deletions.
42 changes: 40 additions & 2 deletions prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,26 @@ def download_extract_cub(cub_dir, cub_url):
download_url(cub_url, root=path.dirname(cub_dir))
filename = path.join(path.dirname(cub_dir), path.basename(cub_url))
with tarfile.open(filename, 'r:gz') as tar:
tar.extractall(path=path.dirname(cub_dir))
def is_within_directory(directory, target):

abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])

return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):

for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)


safe_extract(tar, path=path.dirname(cub_dir))


@ex1.capture
Expand Down Expand Up @@ -77,7 +96,26 @@ def download_extract_cars(cars_dir, cars_url, cars_annotations_url):
download_url(cars_url, root=cars_dir)
filename = path.join(cars_dir, path.basename(cars_url))
with tarfile.open(filename, 'r:gz') as tar:
tar.extractall(path=cars_dir)
def is_within_directory(directory, target):

abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])

return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):

for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)


safe_extract(tar, path=cars_dir)
return path.join(cars_dir, path.basename(cars_annotations_url))


Expand Down

0 comments on commit 6a41db4

Please sign in to comment.