diff --git a/bazarr/app/config.py b/bazarr/app/config.py index a5b1ff7e2..585b73729 100644 --- a/bazarr/app/config.py +++ b/bazarr/app/config.py @@ -239,6 +239,10 @@ def check_parser_binary(value): Validator('opensubtitlescom.use_hash', must_exist=True, default=True, is_type_of=bool), Validator('opensubtitlescom.include_ai_translated', must_exist=True, default=False, is_type_of=bool), + # napiprojekt section + Validator('napiprojekt.only_authors', must_exist=True, default=False, is_type_of=bool), + Validator('napiprojekt.only_real_names', must_exist=True, default=False, is_type_of=bool), + # addic7ed section Validator('addic7ed.username', must_exist=True, default='', is_type_of=str, cast=str), Validator('addic7ed.password', must_exist=True, default='', is_type_of=str, cast=str), diff --git a/bazarr/app/get_providers.py b/bazarr/app/get_providers.py index a649db423..f45118326 100644 --- a/bazarr/app/get_providers.py +++ b/bazarr/app/get_providers.py @@ -30,7 +30,6 @@ from sonarr.blacklist import blacklist_log from utilities.analytics import event_tracker - _TRACEBACK_RE = re.compile(r'File "(.*?providers[\\/].*?)", line (\d+)') @@ -41,7 +40,7 @@ def time_until_midnight(timezone): """ now_in_tz = datetime.datetime.now(tz=timezone) midnight = now_in_tz.replace(hour=0, minute=0, second=0, microsecond=0) + \ - datetime.timedelta(days=1) + datetime.timedelta(days=1) return midnight - now_in_tz @@ -254,6 +253,8 @@ def get_providers_auth(): 'include_ai_translated': settings.opensubtitlescom.include_ai_translated, 'api_key': 's38zmzVlW7IlYruWi7mHwDYl2SfMQoC1' }, + 'napiprojekt': {'only_authors': settings.napiprojekt.only_authors, + 'only_real_names': settings.napiprojekt.only_real_names}, 'podnapisi': { 'only_foreign': False, # fixme 'also_foreign': False, # fixme @@ -369,7 +370,7 @@ def provider_throttle(name, exception, ids=None, language=None): cls = valid_cls throttle_data = provider_throttle_map().get(name, provider_throttle_map()["default"]).get(cls, None) or \ - provider_throttle_map()["default"].get(cls, None) + provider_throttle_map()["default"].get(cls, None) if throttle_data: throttle_delta, throttle_description = throttle_data @@ -379,7 +380,8 @@ def provider_throttle(name, exception, ids=None, language=None): throttle_until = datetime.datetime.now() + throttle_delta if cls_name not in VALID_COUNT_EXCEPTIONS or throttled_count(name): - if cls_name == 'ValueError' and isinstance(exception.args, tuple) and len(exception.args) and exception.args[0].startswith('unsupported pickle protocol'): + if cls_name == 'ValueError' and isinstance(exception.args, tuple) and len(exception.args) and exception.args[ + 0].startswith('unsupported pickle protocol'): for fn in subliminal_cache_region.backend.all_filenames: try: os.remove(fn) diff --git a/custom_libs/subliminal/providers/napiprojekt.py b/custom_libs/subliminal/providers/napiprojekt.py index 75aba3957..940083b71 100644 --- a/custom_libs/subliminal/providers/napiprojekt.py +++ b/custom_libs/subliminal/providers/napiprojekt.py @@ -67,8 +67,10 @@ class NapiProjektProvider(Provider): server_url = 'http://napiprojekt.pl/unit_napisy/dl.php' subtitle_class = NapiProjektSubtitle - def __init__(self): + def __init__(self, only_authors=None, only_real_names=None): self.session = None + self.only_authors = only_authors + self.only_real_names = only_real_names def initialize(self): self.session = Session() @@ -78,6 +80,8 @@ def terminate(self): self.session.close() def query(self, language, hash): + if self.only_authors or self.only_real_names: + return None params = { 'v': 'dreambox', 'kolejka': 'false', diff --git a/custom_libs/subliminal_patch/providers/napiprojekt.py b/custom_libs/subliminal_patch/providers/napiprojekt.py index 7f9a95eb9..58dcc5571 100644 --- a/custom_libs/subliminal_patch/providers/napiprojekt.py +++ b/custom_libs/subliminal_patch/providers/napiprojekt.py @@ -1,6 +1,7 @@ # coding=utf-8 from __future__ import absolute_import import logging +import re from subliminal.providers.napiprojekt import NapiProjektProvider as _NapiProjektProvider, \ NapiProjektSubtitle as _NapiProjektSubtitle, get_subhash @@ -40,6 +41,11 @@ class NapiProjektProvider(_NapiProjektProvider): video_types = (Episode, Movie) subtitle_class = NapiProjektSubtitle + def __init__(self, only_authors=None, only_real_names=None): + super().__init__() + self.only_authors = only_authors + self.only_real_names = only_real_names + def query(self, language, hash): params = { 'v': 'dreambox', @@ -68,8 +74,9 @@ def query(self, language, hash): def list_subtitles(self, video, languages): def flatten(l): return [item for sublist in l for item in sublist] + return [s for s in [self.query(l, video.hashes['napiprojekt']) for l in languages] if s is not None] + \ - flatten([self._scrape(video, l) for l in languages]) + flatten([self._scrape(video, l) for l in languages]) def download_subtitle(self, subtitle): if subtitle.content is not None: @@ -80,7 +87,8 @@ def _scrape(self, video, language): if language.alpha2 != 'pl': return [] title, matches = self._find_title(video) - if title == None: + + if title is None: return [] episode = f'-s{video.season:02d}e{video.episode:02d}' if isinstance( video, Episode) else '' @@ -89,14 +97,59 @@ def _scrape(self, video, language): response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') subtitles = [] - for link in soup.find_all('a'): - if 'class' in link.attrs and 'tableA' in link.attrs['class']: - hash = link.attrs['href'][len('napiprojekt:'):] - subtitles.append( - NapiProjektSubtitle(language, - hash, - release_info=str(link.contents[0]), - matches=matches | ({'season', 'episode'} if episode else set()))) + + # Find all rows with titles and napiprojekt links + rows = soup.find_all("tr", title=True) + + for row in rows: + for link in row.find_all('a'): + if 'class' in link.attrs and 'tableA' in link.attrs['class']: + title = row['title'] + hash = link.attrs['href'][len('napiprojekt:'):] + + data = row.find_all('p') + + size = data[1].contents[0] if len(data) > 1 and data[1].contents else "" + length = data[3].contents[0] if len(data) > 3 and data[3].contents else "" + author = data[4].contents[0] if len(data) > 4 and data[4].contents else "" + added = data[5].contents[0] if len(data) > 5 and data[5].contents else "" + + if author == "": + match = re.search(r"Autor: (.*?)\(", title) + print(title) + if match: + author = match.group(1).strip() + else: + author = "" + + if self.only_authors: + if author.lower() in ["brak", "automat", "si", "chatgpt", "ai", "robot"]: + continue + + if self.only_real_names: + # Check if `self.only_authors` contains exactly 2 uppercase letters and at least one lowercase letter + if not (re.match(r'^(?=(?:.*[A-Z]){2})(?=.*[a-z]).*$', author) or + re.match(r'^\w+\s\w+$', author)): + continue + + match = re.search(r"Video rozdzielczość: (.*?)<", title) + if match: + resolution = match.group(1).strip() + else: + resolution = "" + + match = re.search(r"Video FPS: (.*?)<", title) + if match: + fps = match.group(1).strip() + else: + fps = "" + + added_lenght = "Autor: " + author + " | " + resolution + " | " + fps + " | " + size + " | " + added + " | " + length + subtitles.append( + NapiProjektSubtitle(language, + hash, + release_info=added_lenght, + matches=matches | ({'season', 'episode'} if episode else set()))) logger.debug(f'Found subtitles {subtitles}') return subtitles @@ -114,15 +167,17 @@ def _find_title(self, video): video, Episode) else video.imdb_id def match_title_tag( - tag): return tag.name == 'a' and 'class' in tag.attrs and 'movieTitleCat' in tag.attrs['class'] and 'href' in tag.attrs + tag): + return tag.name == 'a' and 'class' in tag.attrs and 'movieTitleCat' in tag.attrs[ + 'class'] and 'href' in tag.attrs if imdb_id: for entry in soup.find_all(lambda tag: tag.name == 'div' and 'greyBoxCatcher' in tag['class']): if entry.find_all(href=lambda href: href and href.startswith(f'https://www.imdb.com/title/{imdb_id}')): for link in entry.find_all(match_title_tag): return link.attrs['href'][len('napisy-'):], \ - {'series', 'year', 'series_imdb_id'} if isinstance( - video, Episode) else {'title', 'year', 'imdb_id'} + {'series', 'year', 'series_imdb_id'} if isinstance( + video, Episode) else {'title', 'year', 'imdb_id'} type = 'episode' if isinstance(video, Episode) else 'movie' for link in soup.find_all(match_title_tag): diff --git a/frontend/src/pages/Settings/Providers/list.ts b/frontend/src/pages/Settings/Providers/list.ts index ee345f18a..e9d75b87f 100644 --- a/frontend/src/pages/Settings/Providers/list.ts +++ b/frontend/src/pages/Settings/Providers/list.ts @@ -320,7 +320,22 @@ export const ProviderList: Readonly = [ }, ], }, - { key: "napiprojekt", description: "Polish Subtitles Provider" }, + { + key: "napiprojekt", + description: "Polish Subtitles Provider", + inputs: [ + { + type: "switch", + key: "only_authors", + name: "Skip subtitles without authors or possibly AI generated", + }, + { + type: "switch", + key: "only_real_names", + name: "Download subtitles with real name authors only", + }, + ], + }, { key: "napisy24", description: "Polish Subtitles Provider",