Skip to content

Commit

Permalink
Add celebrity photos download
Browse files Browse the repository at this point in the history
Signed-off-by: Ein Verne <einverne@gmail.com>
  • Loading branch information
einverne committed Jun 25, 2017
1 parent 0287168 commit 67ef9d7
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 2 deletions.
14 changes: 12 additions & 2 deletions bin/douban-album-dl
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,37 @@ import requests
import sys
import os

headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36",
"cache-control": "max-age=0"
}


def mayday():
h = """douban-album-dl album_id [location=./album]"""

print(h)


def mkdir(path):
if not os.path.exists(path):
os.makedirs(path)


def get_album(album, path):
idx = 0
mkdir(path)
os.chdir(path)
for photo_url in album.photos():
name = os.path.basename(photo_url)
print("{}: saving {}".format(idx, name))
r = requests.get(photo_url, stream=True)
r = requests.get(photo_url, headers=headers, stream=True)
with open(name, "wb") as f:
f.write(r.content)
idx += 1
print()
print("saving album to {}, total {} images".format(path, idx))


Expand Down
36 changes: 36 additions & 0 deletions bin/douban-celebrity-dl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import os

import requests

from douban.celebrity import Celebrity


def get_celebrity(celebrity, path):
idx = 0
if not os.path.exists(path):
os.mkdir(path)
os.chdir(path)
for refer, photo_url in celebrity.photos():
name = os.path.basename(photo_url)
if os.path.exists(name):
print("pic {} exist skip".format(name))
continue
print("{}: saving {}".format(idx, name))
headers = {
"Referer": refer,
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36"
}
r = requests.get(photo_url, headers=headers, stream=True)
with open(name, "wb") as f:
f.write(r.content)
idx += 1
print("saving celebrity photo to {}".format(path))


if __name__ == "__main__":
celebrity_id = "1335340"
celebrity = Celebrity(celebrity_id)
path = os.path.expanduser("~") + "/Pictures/celebrity/" + celebrity_id
get_celebrity(celebrity, path)
37 changes: 37 additions & 0 deletions douban/celebrity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup


class Celebrity:
BASE_URL = "https://movie.douban.com/celebrity/{}/photos/"

def __init__(self, celebrity_id):
self.url = Celebrity.BASE_URL.format(celebrity_id)

def photos(self):
start = 0
while True:
next_photos = self.__photos(start)
step = len(next_photos)
if 0 == step:
break
for photo in next_photos:
# https://img3.doubanio.com/view/photo/thumb/public/p2156276775.jpg
# https://img3.doubanio.com/view/photo/raw/public/p2156276775.jpg
# https://img3.doubanio.com/view/photo/photo/public/p2156276775.webp
yield photo.a['href'], photo.img['src'].replace("photo/thumb", "photo/raw")
start += step

def __photos(self, start):
params = {
"type": "C",
"start": start,
"sortby": "vote",
"size": "a",
"subtype": "a"
}
r = requests.get(self.url, params=params)
soup = BeautifulSoup(r.text, "html.parser")
return soup.find_all("div", class_="cover")

0 comments on commit 67ef9d7

Please sign in to comment.