Skip to content

Commit

Permalink
Merge pull request #21 from TheBoringDude/refactor
Browse files Browse the repository at this point in the history
Refactor
  • Loading branch information
tbdsux authored Sep 2, 2022
2 parents ee2556d + a101af6 commit de6c469
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 86 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]

python-version: ["3.9", "3.10"]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand Down
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
},
"python.analysis.extraPaths": [
"venv/lib"
]
],
"python.linting.mypyEnabled": true
}
81 changes: 65 additions & 16 deletions api/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ def _get_main_container(self) -> None:
self.info["title"] = container.find("h1", class_="film-title").find("a").text

# RATING (could be either N/A or with number)
self.info["rating"] = self._handle_rating(container.find("div", class_="col-film-rating").find("div"))
self.info["rating"] = self._handle_rating(
container.find("div", class_="col-film-rating").find("div")
)

# POSTER
self.info["poster"] = self._get_poster(container)
Expand Down Expand Up @@ -65,9 +67,10 @@ def _get_other_info(self) -> None:
_title = i.find("b").text.strip()
self.info["others"][
_title.replace(":", "").replace(" ", "_").lower()
] = i.text.replace(
_title + " ", ""
).strip() # remove leading and trailing white spaces
] = [
i.strip()
for i in i.text.replace(_title + " ", "").strip().split(", ")
]

except Exception:
# there was a problem while trying to parse
Expand Down Expand Up @@ -116,9 +119,7 @@ def _get_main_container(self) -> None:
)[1]

# get all headers
_work_headers = [
i.text.strip().lower() for i in _works_container.find_all("h5")
]
_work_headers = [i.text.strip() for i in _works_container.find_all("h5")]
_work_tables = _works_container.find_all("table")

for j, k in zip(_work_headers, _work_tables):
Expand All @@ -136,7 +137,9 @@ def _get_main_container(self) -> None:
"link": urljoin(MYDRAMALIST_WEBSITE, _raw_title["href"]),
"name": _raw_title.text,
},
"rating": self._handle_rating(i.find("td", class_="text-center").find(class_="text-sm"))
"rating": self._handle_rating(
i.find("td", class_="text-center").find(class_="text-sm")
),
}

_raw_role = i.find("td", class_="role")
Expand All @@ -154,7 +157,7 @@ def _get_main_container(self) -> None:
else:
r["role"] = {
"name": _raw_role_name,
"id": _raw_role.find("div", class_="roleid").text.strip(),
"type": _raw_role.find("div", class_="roleid").text.strip(),
}
except Exception:
pass
Expand Down Expand Up @@ -207,7 +210,9 @@ def _get_main_container(self) -> None:
__temp_cast_slug = __temp_cast["href"].strip()
__temp_cast_data = {
"name": __temp_cast.find("b").text.strip(),
"profile_image": self._get_poster(i),
"profile_image": self._get_poster(i).replace(
"s.jpg", "m.jpg"
), # replaces the small images to a link with a bigger one
"slug": __temp_cast_slug,
"link": urljoin(MYDRAMALIST_WEBSITE, __temp_cast_slug),
}
Expand Down Expand Up @@ -249,14 +254,16 @@ def _get_main_container(self) -> None:
__temp_reviews = container.find_all("div", class_="review")

for i in __temp_reviews:
__temp_review = {}
__temp_review: Dict[str, Any] = {}

try:
# reviewer / person
__temp_review["reviewer"] = {
"name": i.find("a").text.strip(),
"user_link": urljoin(MYDRAMALIST_WEBSITE, i.find("a")["href"]),
"user_image": self._get_poster(i),
"user_image": self._get_poster(i).replace(
"1t", "1c"
), # replace 1t to 1c so that it will return a bigger image than the smaller one
"info": i.find("div", class_="user-stats").text.strip(),
}

Expand All @@ -267,11 +274,53 @@ def _get_main_container(self) -> None:
"div", class_="rating-overall"
)

__temp_review["review"] = (
i.find("div", class_=re.compile("review-body"))
.text.replace(__temp_review_ratings.text.strip(), "")
.strip()
# start parsing the review section
__temp_review_contents = []

__temp_review_container = i.find(
"div", class_=re.compile("review-body")
)

__temp_review_spoiler = __temp_review_container.find(
"div", "review-spoiler"
)
if __temp_review_spoiler is not None:
__temp_review_contents.append(__temp_review_spoiler.text.strip())

__temp_review_strong = __temp_review_container.find("strong")
if __temp_review_strong is not None:
__temp_review_contents.append(__temp_review_strong.text.strip())

__temp_review_read_more = __temp_review_container.find(
"p", class_="read-more"
).text.strip()
__temp_review_vote = __temp_review_container.find(
"div", class_="review-helpful"
).text.strip()

for i in __temp_review_container.find_all("br"):
i.replace_with("\n")

__temp_review_content = (
__temp_review_container.text.replace(
__temp_review_ratings.text.strip(), ""
)
.replace(__temp_review_read_more, "")
.replace(__temp_review_vote, "")
)

if __temp_review_spoiler is not None:
__temp_review_content = __temp_review_content.replace(
__temp_review_spoiler.text.strip(), ""
)
if __temp_review_strong is not None:
__temp_review_content = __temp_review_content.replace(
__temp_review_strong.text.strip(), ""
)

__temp_review_contents.append(__temp_review_content.strip())
__temp_review["review"] = __temp_review_contents
# end parsing the review section

__temp_review["ratings"] = {
"overall": float(
Expand Down
7 changes: 5 additions & 2 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,11 @@ async def fetch_cast(drama_id: str, response: Response) -> Dict[str, Any]:


@app.get("/id/{drama_id}/reviews")
async def fetch_reviews(drama_id: str, response: Response) -> Dict[str, Any]:
code, r = await fetch_func(query=f"{drama_id}/reviews", t="reviews")
async def fetch_reviews(
drama_id: str, response: Response, page: int = 1
) -> Dict[str, Any]:

code, r = await fetch_func(query=f"{drama_id}/reviews?page={page}", t="reviews")

response.status_code = code
return r
Expand Down
15 changes: 5 additions & 10 deletions api/parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, List, Any, Tuple, Type, TypeVar, Union
from typing import Dict, List, Any, Type, TypeVar, Union

from api import MYDRAMALIST_WEBSITE

Expand Down Expand Up @@ -32,7 +32,7 @@ def __init__(self, soup: BeautifulSoup, query: str, code: int, ok: bool) -> None
self.soup = soup
self.query = query
self.status_code = code
self.url = ""
self.ok = ok

@classmethod
async def scrape(cls: Type[T], query: str, t: str) -> T:
Expand All @@ -45,7 +45,7 @@ async def scrape(cls: Type[T], query: str, t: str) -> T:
url = ScrapeTypes[t] + query

ok = True
code = 0
code = 500 # default to 500, internal server error
soup = None

try:
Expand All @@ -58,18 +58,13 @@ async def scrape(cls: Type[T], query: str, t: str) -> T:

# set the status code
code = resp.status_code
ok = resp.status_code == 200

except Exception:
ok = False

return cls(soup, query, code, ok)

def check(self) -> Tuple[int, bool]:
"""Checks the status_code and returns it."""
if self.status_code == 200:
return 200, True

return self.status_code, False

# get page err, if possible
def res_get_err(self) -> Dict[str, Any]:
container = self.soup.find("div", class_="app-body")
Expand Down
14 changes: 6 additions & 8 deletions api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@ def error(code: int, description: str) -> Dict[str, Any]:
# search function
async def search_func(query: str) -> Tuple[int, Dict[str, Any]]:
f = await Search.scrape(query=query, t="search")
code, ok = f.check()
if not ok:
return code, error(code, "An unexpected error occurred.")
if not f.ok:
return f.status_code, error(f.status_code, "An unexpected error occurred.")
else:
f._get_search_results()

return code, f.search()
return f.status_code, f.search()


fs = {
Expand All @@ -40,10 +39,9 @@ async def fetch_func(query: str, t: str) -> Tuple[int, Dict[str, Any]]:
raise Exception("Invalid Error")

f = await fs[t].scrape(query=query, t="page")
code, ok = f.check()
if not ok:
return code, error(code, "An unexpected error occurred.")
if not f.ok:
return f.status_code, error(f.status_code, "An unexpected error occurred.")
else:
f._get()

return code, f.fetch()
return f.status_code, f.fetch()
3 changes: 3 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ disallow_incomplete_defs = True
check_untyped_defs = True
disallow_untyped_decorators = True
no_implicit_optional = True
show_error_codes = True
warn_redundant_casts = True
warn_unused_ignores = True
warn_return_any = True
# implicit_reexport = False
strict_equality = True
no-strict-optional=False
# --strict end


[mypy-tests.*]
ignore_missing_imports = True
check_untyped_defs = True
Expand Down
82 changes: 36 additions & 46 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,46 +1,36 @@
appdirs
asgiref
astroid
attrs
beautifulsoup4
black
bs4
certifi
chardet
click
cloudscraper
fastapi
flake8
h11
idna
importlib-metadata
iniconfig
isort
lazy-object-proxy
lxml
mccabe
mypy
mypy-extensions
packaging
pathspec
pluggy
py
pycodestyle
pydantic
pyflakes
pylint
pyparsing
pytest
regex
requests
requests-toolbelt
rope
soupsieve
starlette
toml
typed-ast==1.4.3
typing-extensions==3.10.0.0
urllib3
uvicorn
wrapt
zipp
anyio==3.6.1
attrs==22.1.0
beautifulsoup4==4.11.1
black==22.8.0
certifi==2022.6.15
charset-normalizer==2.1.1
click==8.1.3
cloudscraper==1.2.64
fastapi==0.81.0
flake8==5.0.4
h11==0.13.0
idna==3.3
iniconfig==1.1.1
lxml==4.9.1
mccabe==0.7.0
mypy==0.971
mypy-extensions==0.4.3
packaging==21.3
pathspec==0.10.0
platformdirs==2.5.2
pluggy==1.0.0
py==1.11.0
pycodestyle==2.9.1
pydantic==1.10.1
pyflakes==2.5.0
pyparsing==3.0.9
pytest==7.1.2
requests==2.28.1
requests-toolbelt==0.9.1
sniffio==1.2.0
soupsieve==2.3.2.post1
starlette==0.19.1
tomli==2.0.1
typing_extensions==4.3.0
urllib3==1.26.12
uvicorn==0.18.3
5 changes: 4 additions & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_sample_drama() -> None:
dramas = [
{
"id": "65743-alien-girl-chai-xiao-qi-2",
"title": "My Girlfriend is an Alien 2",
"title": "My Girlfriend Is an Alien 2",
},
{"id": "58953-mouse", "title": "Mouse"},
]
Expand All @@ -24,6 +24,9 @@ def test_sample_drama() -> None:
r = client.get(f"/id/{i['id']}")

assert r.status_code == 200

print(r.json()["data"])

assert r.json()["data"]["title"] == i["title"]


Expand Down

1 comment on commit de6c469

@vercel
Copy link

@vercel vercel bot commented on de6c469 Sep 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

kuryana – ./

kuryana-theboringdude.vercel.app
kuryana.vercel.app
kuryana-git-master-theboringdude.vercel.app

Please sign in to comment.