diff --git a/newshomepages/extract/__init__.py b/newshomepages/extract/__init__.py
new file mode 100644
index 00000000000..35165ad62ae
--- /dev/null
+++ b/newshomepages/extract/__init__.py
@@ -0,0 +1,3 @@
+from .cli import cli_group as cli
+
+__all__ = ("cli",)
diff --git a/newshomepages/extract/__main__.py b/newshomepages/extract/__main__.py
new file mode 100644
index 00000000000..76eebfa5dd6
--- /dev/null
+++ b/newshomepages/extract/__main__.py
@@ -0,0 +1,4 @@
+from .cli import cli_group
+
+if __name__ == "__main__":
+    cli_group()
diff --git a/newshomepages/extract.py b/newshomepages/extract/cli.py
similarity index 85%
rename from newshomepages/extract.py
rename to newshomepages/extract/cli.py
index e132064ddbb..32ff5222bb7 100644
--- a/newshomepages/extract.py
+++ b/newshomepages/extract/cli.py
@@ -2,22 +2,20 @@
 import json
 import os
 import pathlib
-import re
 import time
 import typing
 from datetime import datetime
 from urllib.parse import urlparse
 
 import click
-import internetarchive
 import pandas as pd
 import requests
 from requests.adapters import HTTPAdapter, Retry
-from retry import retry
 from rich import print
 from rich.progress import track
 
-from . import utils
+from .. import utils
+from .items import cli as download_items
 
 IA_ACCESS_KEY = os.getenv("IA_ACCESS_KEY")
 IA_SECRET_KEY = os.getenv("IA_SECRET_KEY")
@@ -32,70 +30,6 @@ def cli():
     pass
 
 
-@cli.command()
-@click.option("-y", "--year", "year", default=CURRENT_YEAR)
-@click.option("--site", "site", default=None)
-@click.option("--country", "country", default=None)
-@click.option("--language", "language", default=None)
-@click.option("--bundle", "bundle", default=None)
-@click.option("--batch", "batch", default=None)
-@click.option("-o", "--output-path", "output_path", default=utils.EXTRACT_DIR / "json")
-@click.option("--wait", "wait", default=5, help="How long to pause between requests")
-def download_items(
-    year: str,
-    site: typing.Optional[str] = None,
-    country: typing.Optional[str] = None,
-    language: typing.Optional[str] = None,
-    bundle: typing.Optional[str] = None,
-    batch: typing.Optional[str] = None,
-    output_path=utils.EXTRACT_DIR / "json",
-    wait: float = 5,
-):
-    """Download the full list of Internet Archive items as JSON."""
-    assert IA_COLLECTION
-
-    @retry(tries=3, delay=30, backoff=2)
-    def _save_item(item):
-        # Save it locally
-        output_obj = pathlib.Path(output_path)
-        output_obj.mkdir(parents=True, exist_ok=True)
-        with open(output_obj / f"{item.identifier}.json", "w") as fh:
-            json.dump(item.item_metadata, fh, indent=2)
-            time.sleep(wait)
-
-    @retry(tries=3, delay=30, backoff=2)
-    def _site_search(s):
-        s = s["handle"].lower()
-        # Replace any leading underscores, which don't work on archive.org
-        s = re.sub("^(_+)", "", s)
-        search = f"collection:{IA_COLLECTION} AND identifier:({s}-{year})"
-        return internetarchive.search_items(search).iter_as_items()
-
-    # If the user has provided a way to filter to a subset of sites, pull em out
-    if site:
-        site_list = [utils.get_site(site)]
-    elif country:
-        site_list = utils.get_sites_in_country(country)
-    elif language:
-        site_list = utils.get_sites_in_language(language)
-    elif bundle:
-        site_list = utils.get_sites_in_bundle(bundle)
-    elif batch:
-        site_list = utils.get_sites_in_batch(int(batch))
-    else:
-        site_list = None
-
-    # If we're filtering go get those
-    if site_list:
-        for obj in track(site_list):
-            [_save_item(i) for i in _site_search(obj)]
-    # Otherwise, go get all items in the collection from this year
-    else:
-        search = f"collection:{IA_COLLECTION} AND identifier:(*-{year})"
-        item_list = internetarchive.search_items(search).iter_as_items()
-        [_save_item(i) for i in item_list]
-
-
 @cli.command()
 @click.argument("handle")
 def download_accessibility(handle):
@@ -547,5 +481,7 @@ def _get_json_url(url):
     return df
 
 
+cli_group = click.CommandCollection(sources=[cli, download_items])
+
 if __name__ == "__main__":
-    cli()
+    cli_group()
diff --git a/newshomepages/extract/items.py b/newshomepages/extract/items.py
new file mode 100644
index 00000000000..2ba5a1fb631
--- /dev/null
+++ b/newshomepages/extract/items.py
@@ -0,0 +1,107 @@
+import os
+import time
+import typing
+from datetime import datetime
+from pathlib import Path
+
+import click
+import internetarchive
+from retry import retry
+from rich import print
+
+from .. import utils
+
+IA_COLLECTION = os.getenv("IA_COLLECTION")
+
+
+@click.group()
+def cli():
+    """Download items from our archive.org collection as JSON."""
+    pass
+
+
+@cli.command()
+@click.option("-y", "--year", "year", default=None)
+@click.option("--site", "site", default=None)
+@click.option("--country", "country", default=None)
+@click.option("--language", "language", default=None)
+@click.option("--bundle", "bundle", default=None)
+@click.option("--batch", "batch", default=None)
+@click.option("-o", "--output-path", "output_path", default="./")
+@click.option("--wait", "wait", default="0", help="How long to pause between requests")
+def items(
+    year: typing.Optional[typing.Any] = None,
+    site: typing.Optional[str] = None,
+    country: typing.Optional[str] = None,
+    language: typing.Optional[str] = None,
+    bundle: typing.Optional[str] = None,
+    batch: typing.Optional[str] = None,
+    output_path: str = "./",
+    wait: typing.Any = "0",
+):
+    """Download items from our archive.org collection as JSON."""
+    # Set some variables for later
+    assert IA_COLLECTION
+    wait = float(wait)
+    if year:
+        year = int(year)
+    else:
+        year = datetime.now().year
+
+    @retry(tries=3, delay=30, backoff=2)
+    def _save_item(item):
+        """Save an item as JSON to disk."""
+        utils.write_json(
+            item.item_metadata, Path(output_path) / f"{item.identifier}.json"
+        )
+        if wait:
+            print(f"Waiting {wait} seconds")
+            time.sleep(wait)
+
+    @retry(tries=3, delay=30, backoff=2)
+    def _site_search(s):
+        """Search archive.org for items to download."""
+        s = utils.safe_ia_handle(s["handle"])
+        search = f"collection:{IA_COLLECTION} AND identifier:({s}-{year})"
+        return internetarchive.search_items(search).iter_as_items()
+
+    # If the user has provided a way to filter to a subset of sites, pull em out
+    if site:
+        site_list = [utils.get_site(site)]
+    elif country:
+        print(
+            f"Downloading items for country `{country}` from archive.org collection `{IA_COLLECTION}`"
+        )
+        site_list = utils.get_sites_in_country(country)
+    elif language:
+        print(
+            f"Downloading items for language `{language}` from archive.org collection `{IA_COLLECTION}`"
+        )
+        site_list = utils.get_sites_in_language(language)
+    elif bundle:
+        print(
+            f"Downloading items for bundle `{bundle}` from archive.org collection `{IA_COLLECTION}`"
+        )
+        site_list = utils.get_sites_in_bundle(bundle)
+    elif batch:
+        print(
+            f"Downloading items for batch `{batch}` from archive.org collection `{IA_COLLECTION}`"
+        )
+        site_list = utils.get_sites_in_batch(int(batch))
+    else:
+        print(f"Downloading all items from archive.org collection `{IA_COLLECTION}`")
+        site_list = None
+
+    # If we're filtering go get those
+    if site_list:
+        for obj in site_list:
+            print(
+                f"Downloading items for site `{obj['handle']}` from archive.org collection `{IA_COLLECTION}`"
+            )
+            [_save_item(i) for i in _site_search(obj)]
+
+    # Otherwise, go get all items in the collection from this year
+    else:
+        search = f"collection:{IA_COLLECTION} AND identifier:(*-{year})"
+        item_list = internetarchive.search_items(search).iter_as_items()
+        [_save_item(i) for i in item_list]
diff --git a/newshomepages/utils.py b/newshomepages/utils.py
index 017528b3d51..9c08e7de3e5 100644
--- a/newshomepages/utils.py
+++ b/newshomepages/utils.py
@@ -1,4 +1,5 @@
 import csv
+import json
 import re
 import tempfile
 import time
@@ -41,6 +42,14 @@ def safe_ia_handle(s):
     return s
 
 
+def write_json(data: typing.Any, path: Path, indent: int = 2):
+    """Write JSON data to the provided path."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    print(f"📥 Writing JSON to {path}")
+    with open(path, "w") as fh:
+        json.dump(data, fh, indent=2)
+
+
 def parse_archive_url(url: str):
     """Parse the handle and timestamp from an archive.org URL."""
     o = urlparse(url)
diff --git a/tests/test_extract.py b/tests/test_extract.py
new file mode 100644
index 00000000000..bdad76302e8
--- /dev/null
+++ b/tests/test_extract.py
@@ -0,0 +1,10 @@
+from click.testing import CliRunner
+
+from newshomepages.extract import cli
+
+
+def test_item(tmp_path):
+    """Test a site's item download."""
+    runner = CliRunner()
+    result = runner.invoke(cli, ["items", "--site=latimes", f"-o={tmp_path}"])
+    assert result.exit_code == 0