Merge pull request #21 from TheBoringDude/refactor

Refactor
tbdsux · Sep 2, 2022 · de6c469 · de6c469 · vercel · Sep 2, 2022
2 parents ee2556d + a101af6
commit de6c469
Show file tree

Hide file tree

Showing 9 changed files with 128 additions and 86 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -7,8 +7,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
-
+        python-version: ["3.9", "3.10"]
+        
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python ${{ matrix.python-version }}

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -11,5 +11,6 @@
     },
     "python.analysis.extraPaths": [
         "venv/lib"
-    ]
+    ],
+    "python.linting.mypyEnabled": true
 }
diff --git a/api/fetch.py b/api/fetch.py
@@ -23,7 +23,9 @@ def _get_main_container(self) -> None:
         self.info["title"] = container.find("h1", class_="film-title").find("a").text
 
         # RATING (could be either N/A or with number)
-        self.info["rating"] = self._handle_rating(container.find("div", class_="col-film-rating").find("div"))
+        self.info["rating"] = self._handle_rating(
+            container.find("div", class_="col-film-rating").find("div")
+        )
 
         # POSTER
         self.info["poster"] = self._get_poster(container)
@@ -65,9 +67,10 @@ def _get_other_info(self) -> None:
                 _title = i.find("b").text.strip()
                 self.info["others"][
                     _title.replace(":", "").replace(" ", "_").lower()
-                ] = i.text.replace(
-                    _title + " ", ""
-                ).strip()  # remove leading and trailing white spaces
+                ] = [
+                    i.strip()
+                    for i in i.text.replace(_title + " ", "").strip().split(", ")
+                ]
 
         except Exception:
             # there was a problem while trying to parse
@@ -116,9 +119,7 @@ def _get_main_container(self) -> None:
         )[1]
 
         # get all headers
-        _work_headers = [
-            i.text.strip().lower() for i in _works_container.find_all("h5")
-        ]
+        _work_headers = [i.text.strip() for i in _works_container.find_all("h5")]
         _work_tables = _works_container.find_all("table")
 
         for j, k in zip(_work_headers, _work_tables):
@@ -136,7 +137,9 @@ def _get_main_container(self) -> None:
                         "link": urljoin(MYDRAMALIST_WEBSITE, _raw_title["href"]),
                         "name": _raw_title.text,
                     },
-                    "rating": self._handle_rating(i.find("td", class_="text-center").find(class_="text-sm"))
+                    "rating": self._handle_rating(
+                        i.find("td", class_="text-center").find(class_="text-sm")
+                    ),
                 }
 
                 _raw_role = i.find("td", class_="role")
@@ -154,7 +157,7 @@ def _get_main_container(self) -> None:
                     else:
                         r["role"] = {
                             "name": _raw_role_name,
-                            "id": _raw_role.find("div", class_="roleid").text.strip(),
+                            "type": _raw_role.find("div", class_="roleid").text.strip(),
                         }
                 except Exception:
                     pass
@@ -207,7 +210,9 @@ def _get_main_container(self) -> None:
                 __temp_cast_slug = __temp_cast["href"].strip()
                 __temp_cast_data = {
                     "name": __temp_cast.find("b").text.strip(),
-                    "profile_image": self._get_poster(i),
+                    "profile_image": self._get_poster(i).replace(
+                        "s.jpg", "m.jpg"
+                    ),  # replaces the small images to a link with a bigger one
                     "slug": __temp_cast_slug,
                     "link": urljoin(MYDRAMALIST_WEBSITE, __temp_cast_slug),
                 }
@@ -249,14 +254,16 @@ def _get_main_container(self) -> None:
         __temp_reviews = container.find_all("div", class_="review")
 
         for i in __temp_reviews:
-            __temp_review = {}
+            __temp_review: Dict[str, Any] = {}
 
             try:
                 # reviewer / person
                 __temp_review["reviewer"] = {
                     "name": i.find("a").text.strip(),
                     "user_link": urljoin(MYDRAMALIST_WEBSITE, i.find("a")["href"]),
-                    "user_image": self._get_poster(i),
+                    "user_image": self._get_poster(i).replace(
+                        "1t", "1c"
+                    ),  # replace 1t to 1c so that it will return a bigger image than the smaller one
                     "info": i.find("div", class_="user-stats").text.strip(),
                 }
 
@@ -267,11 +274,53 @@ def _get_main_container(self) -> None:
                     "div", class_="rating-overall"
                 )
 
-                __temp_review["review"] = (
-                    i.find("div", class_=re.compile("review-body"))
-                    .text.replace(__temp_review_ratings.text.strip(), "")
-                    .strip()
+                # start parsing the review section
+                __temp_review_contents = []
+
+                __temp_review_container = i.find(
+                    "div", class_=re.compile("review-body")
+                )
+
+                __temp_review_spoiler = __temp_review_container.find(
+                    "div", "review-spoiler"
                 )
+                if __temp_review_spoiler is not None:
+                    __temp_review_contents.append(__temp_review_spoiler.text.strip())
+
+                __temp_review_strong = __temp_review_container.find("strong")
+                if __temp_review_strong is not None:
+                    __temp_review_contents.append(__temp_review_strong.text.strip())
+
+                __temp_review_read_more = __temp_review_container.find(
+                    "p", class_="read-more"
+                ).text.strip()
+                __temp_review_vote = __temp_review_container.find(
+                    "div", class_="review-helpful"
+                ).text.strip()
+
+                for i in __temp_review_container.find_all("br"):
+                    i.replace_with("\n")
+
+                __temp_review_content = (
+                    __temp_review_container.text.replace(
+                        __temp_review_ratings.text.strip(), ""
+                    )
+                    .replace(__temp_review_read_more, "")
+                    .replace(__temp_review_vote, "")
+                )
+
+                if __temp_review_spoiler is not None:
+                    __temp_review_content = __temp_review_content.replace(
+                        __temp_review_spoiler.text.strip(), ""
+                    )
+                if __temp_review_strong is not None:
+                    __temp_review_content = __temp_review_content.replace(
+                        __temp_review_strong.text.strip(), ""
+                    )
+
+                __temp_review_contents.append(__temp_review_content.strip())
+                __temp_review["review"] = __temp_review_contents
+                # end parsing the review section
 
                 __temp_review["ratings"] = {
                     "overall": float(

diff --git a/api/main.py b/api/main.py
@@ -41,8 +41,11 @@ async def fetch_cast(drama_id: str, response: Response) -> Dict[str, Any]:
 
 
 @app.get("/id/{drama_id}/reviews")
-async def fetch_reviews(drama_id: str, response: Response) -> Dict[str, Any]:
-    code, r = await fetch_func(query=f"{drama_id}/reviews", t="reviews")
+async def fetch_reviews(
+    drama_id: str, response: Response, page: int = 1
+) -> Dict[str, Any]:
+
+    code, r = await fetch_func(query=f"{drama_id}/reviews?page={page}", t="reviews")
 
     response.status_code = code
     return r

diff --git a/api/parser.py b/api/parser.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Any, Tuple, Type, TypeVar, Union
+from typing import Dict, List, Any, Type, TypeVar, Union
 
 from api import MYDRAMALIST_WEBSITE
 
@@ -32,7 +32,7 @@ def __init__(self, soup: BeautifulSoup, query: str, code: int, ok: bool) -> None
         self.soup = soup
         self.query = query
         self.status_code = code
-        self.url = ""
+        self.ok = ok
 
     @classmethod
     async def scrape(cls: Type[T], query: str, t: str) -> T:
@@ -45,7 +45,7 @@ async def scrape(cls: Type[T], query: str, t: str) -> T:
             url = ScrapeTypes[t] + query
 
         ok = True
-        code = 0
+        code = 500  # default to 500, internal server error
         soup = None
 
         try:
@@ -58,18 +58,13 @@ async def scrape(cls: Type[T], query: str, t: str) -> T:
 
             # set the status code
             code = resp.status_code
+            ok = resp.status_code == 200
+
         except Exception:
             ok = False
 
         return cls(soup, query, code, ok)
 
-    def check(self) -> Tuple[int, bool]:
-        """Checks the status_code and returns it."""
-        if self.status_code == 200:
-            return 200, True
-
-        return self.status_code, False
-
     # get page err, if possible
     def res_get_err(self) -> Dict[str, Any]:
         container = self.soup.find("div", class_="app-body")

diff --git a/api/utils.py b/api/utils.py
@@ -17,13 +17,12 @@ def error(code: int, description: str) -> Dict[str, Any]:
 # search function
 async def search_func(query: str) -> Tuple[int, Dict[str, Any]]:
     f = await Search.scrape(query=query, t="search")
-    code, ok = f.check()
-    if not ok:
-        return code, error(code, "An unexpected error occurred.")
+    if not f.ok:
+        return f.status_code, error(f.status_code, "An unexpected error occurred.")
     else:
         f._get_search_results()
 
-    return code, f.search()
+    return f.status_code, f.search()
 
 
 fs = {
@@ -40,10 +39,9 @@ async def fetch_func(query: str, t: str) -> Tuple[int, Dict[str, Any]]:
         raise Exception("Invalid Error")
 
     f = await fs[t].scrape(query=query, t="page")
-    code, ok = f.check()
-    if not ok:
-        return code, error(code, "An unexpected error occurred.")
+    if not f.ok:
+        return f.status_code, error(f.status_code, "An unexpected error occurred.")
     else:
         f._get()
 
-    return code, f.fetch()
+    return f.status_code, f.fetch()
diff --git a/mypy.ini b/mypy.ini
@@ -7,13 +7,16 @@ disallow_incomplete_defs = True
 check_untyped_defs = True 
 disallow_untyped_decorators = True 
 no_implicit_optional = True
+show_error_codes = True
 warn_redundant_casts = True 
 warn_unused_ignores = True
 warn_return_any = True 
 # implicit_reexport = False
 strict_equality = True
+no-strict-optional=False
 # --strict end
 
+
 [mypy-tests.*]
 ignore_missing_imports = True
 check_untyped_defs = True

diff --git a/requirements.txt b/requirements.txt
@@ -1,46 +1,36 @@
-appdirs
-asgiref
-astroid
-attrs
-beautifulsoup4
-black
-bs4
-certifi
-chardet
-click
-cloudscraper
-fastapi
-flake8
-h11
-idna
-importlib-metadata
-iniconfig
-isort
-lazy-object-proxy
-lxml
-mccabe
-mypy
-mypy-extensions
-packaging
-pathspec
-pluggy
-py
-pycodestyle
-pydantic
-pyflakes
-pylint
-pyparsing
-pytest
-regex
-requests
-requests-toolbelt
-rope
-soupsieve
-starlette
-toml
-typed-ast==1.4.3
-typing-extensions==3.10.0.0
-urllib3
-uvicorn
-wrapt
-zipp
+anyio==3.6.1
+attrs==22.1.0
+beautifulsoup4==4.11.1
+black==22.8.0
+certifi==2022.6.15
+charset-normalizer==2.1.1
+click==8.1.3
+cloudscraper==1.2.64
+fastapi==0.81.0
+flake8==5.0.4
+h11==0.13.0
+idna==3.3
+iniconfig==1.1.1
+lxml==4.9.1
+mccabe==0.7.0
+mypy==0.971
+mypy-extensions==0.4.3
+packaging==21.3
+pathspec==0.10.0
+platformdirs==2.5.2
+pluggy==1.0.0
+py==1.11.0
+pycodestyle==2.9.1
+pydantic==1.10.1
+pyflakes==2.5.0
+pyparsing==3.0.9
+pytest==7.1.2
+requests==2.28.1
+requests-toolbelt==0.9.1
+sniffio==1.2.0
+soupsieve==2.3.2.post1
+starlette==0.19.1
+tomli==2.0.1
+typing_extensions==4.3.0
+urllib3==1.26.12
+uvicorn==0.18.3
diff --git a/tests/test_main.py b/tests/test_main.py
@@ -15,7 +15,7 @@ def test_sample_drama() -> None:
     dramas = [
         {
             "id": "65743-alien-girl-chai-xiao-qi-2",
-            "title": "My Girlfriend is an Alien 2",
+            "title": "My Girlfriend Is an Alien 2",
         },
         {"id": "58953-mouse", "title": "Mouse"},
     ]
@@ -24,6 +24,9 @@ def test_sample_drama() -> None:
         r = client.get(f"/id/{i['id']}")
 
         assert r.status_code == 200
+
+        print(r.json()["data"])
+
         assert r.json()["data"]["title"] == i["title"]