Skip to content

Commit

Permalink
0.3.17 better cache saving
Browse files Browse the repository at this point in the history
  • Loading branch information
yaroslaff committed Apr 14, 2023
1 parent a9eb3c0 commit c4a178c
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 7 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@

# 0.3.16
- conditionally print number of new images

# 0.3.17
- Better cache saving: save cache to tmp file and rename, nice error if json damaged, --cache-save option
12 changes: 9 additions & 3 deletions bin/nudecrawler
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ stats = {
'resume': dict(),
'gap_max': 0,
'gap_url': None,
'cache_path': None
'cache_path': None,
'cache_save': 1
}

previous_content_length = None
Expand Down Expand Up @@ -120,6 +121,8 @@ def get_args(argv=None):
def_errors = 5
def_minsize=10

def_cache_save = 1000

methods_list = ', '.join(filter_methods.keys())

parser.add_argument('words', nargs='*')
Expand All @@ -129,6 +132,8 @@ def get_args(argv=None):
parser.add_argument('--url1', metavar="URL", help='process only one url')
parser.add_argument('-f', '--fails', type=int, default=5, help='stop searching next pages with same words after N failures')
parser.add_argument('--day', nargs=2, type=int, metavar=('MONTH', 'DAY'), help='Current date (default is today) example: --day 12 31')
parser.add_argument('--cache-save', type=int, metavar='N', default=def_cache_save, help=f'Save cache after N new images ({def_cache_save})')


g = parser.add_argument_group('Page filtering options')
parser.add_argument('--expr', '-e', metavar='EXPR', default=def_expr,
Expand Down Expand Up @@ -215,7 +220,7 @@ def analyse(url):
save_stats(force=False)

if stats['cache_path']:
cache.save_conditional(stats['cache_path'])
cache.save_conditional(stats['cache_path'], stats['cache_save'])

if stop_after is not None and get_processed_images() > stop_after:
print("Stop/refresh after processed", get_processed_images(), "images...")
Expand Down Expand Up @@ -385,7 +390,8 @@ def main():
stats['filter']['max_errors'] = args.max_errors
stats['filter']['max_pictures'] = args.max_pictures
stats['cache_path'] = args.cache

stats['cache_save'] = args.cache_save

if args.bugreport:
nudecrawler.verbose.send_bugreports = True

Expand Down
15 changes: 12 additions & 3 deletions nudecrawler/cache.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json

import os
import sys
from .verbose import printv

class ImageCache(object):
Expand Down Expand Up @@ -44,16 +45,24 @@ def register(self, url, sum, verdict):

def load(self, path):
with open(path) as fh:
cache = json.load(fh)
try:
cache = json.load(fh)
except json.decoder.JSONDecodeError:
print('Invalid JSON in cache file', path)
print('Fix or delete file and restart')
sys.exit(1)


self._url2sum = cache['_url2sum']
self._sum2v = cache['_sum2v']
print(f"Loaded {len(self._url2sum)} urls and {len(self._sum2v)} sums cache")

def save_conditional(self, path, new=1):
tmppath = path + '.tmp'
if self._new >= new:
printv(f"Save cache with {self._new} updates")
self.save(path)
self.save(tmppath)
os.rename(tmppath, path)

def save(self, path):
data = dict(_url2sum = self._url2sum, _sum2v = self._sum2v)
Expand Down
2 changes: 1 addition & 1 deletion nudecrawler/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version="0.3.16"
version="0.3.17"

0 comments on commit c4a178c

Please sign in to comment.