Skip to content

Commit

Permalink
add option to remove finished run from db after export
Browse files Browse the repository at this point in the history
  • Loading branch information
puhoy committed Feb 20, 2022
1 parent b404a6e commit 9a9f61b
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .env.dist
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,6 @@ HUBGREP_OLD_RUN_AGE=3600

# max retries per crawler block before we ignore it
HUBGREP_BLOCK_MAX_RETRIES=3

# keep the data of the last finished hoster crawl in db - needed for manual csv export
HUBGREP_KEEP_LAST_RUN_IN_DB=1
3 changes: 2 additions & 1 deletion hubgrep_indexer/cli_blueprint/repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
@cli_bp.cli.command()
@click.argument("hosting_service")
def export_repos(hosting_service):
hosting_service_api_url = hosting_service
hosting_service: HostingService = HostingService.query.filter_by(
api_url=hosting_service
api_url=hosting_service_api_url
).first()

hosting_service.export_repos()
Expand Down
1 change: 1 addition & 0 deletions hubgrep_indexer/config/dotenv.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ class DotEnvConfig(Config):
LOGLEVEL = os.environ.get("HUBGREP_INDEXER_LOGLEVEL", "debug")

BLOCK_MAX_RETRIES = int(os.environ.get("HUBGREP_BLOCK_MAX_RETRIES", 3))
KEEP_LAST_RUN_IN_DB = bool(int(os.environ.get("HUBGREP_KEEP_LAST_RUN_IN_DB", 1)))
1 change: 1 addition & 0 deletions hubgrep_indexer/config/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ class TestingConfig(Config):
LOGIN_DISABLED = True

BLOCK_MAX_RETRIES = 3
KEEP_LAST_RUN_IN_DB = 1
9 changes: 9 additions & 0 deletions hubgrep_indexer/models/hosting_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from hubgrep_indexer import db
from hubgrep_indexer.models.export_meta import ExportMeta
from hubgrep_indexer.models.repositories.abstract_repository import Repository
from hubgrep_indexer.lib.table_helper import TableHelper

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -161,6 +162,14 @@ def handle_finished_run(self):
repo_class.rotate(self)
logger.debug(f"rotated repos for {self} - took {ts_rotate_start - time.time()}s")
self.export_repos()
logger.debug(f"export for {self} finished")
if not current_app.config['KEEP_LAST_RUN_IN_DB']:
logger.debug(f"dropping table for exported {self}")

target_table = Repository.get_finished_table_name(self)
with TableHelper._cursor() as cur:
TableHelper.drop_table(cur, target_table)


@property
def repos(self) -> ResultProxy:
Expand Down

0 comments on commit 9a9f61b

Please sign in to comment.