Add celebrity photos download

Signed-off-by: Ein Verne <einverne@gmail.com>
einverne · Jun 25, 2017 · 67ef9d7 · 67ef9d7
1 parent 0287168
commit 67ef9d7
Show file tree

Hide file tree

Showing 3 changed files with 85 additions and 2 deletions.
diff --git a/bin/douban-album-dl b/bin/douban-album-dl
@@ -5,27 +5,37 @@ import requests
 import sys
 import os
 
+headers = {
+    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
+    "accept-encoding": "gzip, deflate, br",
+    "accept-language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4",
+    "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36",
+    "cache-control": "max-age=0"
+}
+
+
 def mayday():
     h = """douban-album-dl album_id [location=./album]"""
 
     print(h)
 
+
 def mkdir(path):
     if not os.path.exists(path):
         os.makedirs(path)
 
+
 def get_album(album, path):
     idx = 0
     mkdir(path)
     os.chdir(path)
     for photo_url in album.photos():
         name = os.path.basename(photo_url)
         print("{}: saving {}".format(idx, name))
-        r = requests.get(photo_url, stream=True)
+        r = requests.get(photo_url, headers=headers, stream=True)
         with open(name, "wb") as f:
             f.write(r.content)
         idx += 1
-    print()
     print("saving album to {}, total {} images".format(path, idx))
 
 

diff --git a/bin/douban-celebrity-dl.py b/bin/douban-celebrity-dl.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import os
+
+import requests
+
+from douban.celebrity import Celebrity
+
+
+def get_celebrity(celebrity, path):
+    idx = 0
+    if not os.path.exists(path):
+        os.mkdir(path)
+    os.chdir(path)
+    for refer, photo_url in celebrity.photos():
+        name = os.path.basename(photo_url)
+        if os.path.exists(name):
+            print("pic {} exist skip".format(name))
+            continue
+        print("{}: saving {}".format(idx, name))
+        headers = {
+            "Referer": refer,
+            "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36"
+        }
+        r = requests.get(photo_url, headers=headers, stream=True)
+        with open(name, "wb") as f:
+            f.write(r.content)
+        idx += 1
+    print("saving celebrity photo to {}".format(path))
+
+
+if __name__ == "__main__":
+    celebrity_id = "1335340"
+    celebrity = Celebrity(celebrity_id)
+    path = os.path.expanduser("~") + "/Pictures/celebrity/" + celebrity_id
+    get_celebrity(celebrity, path)
diff --git a/douban/celebrity.py b/douban/celebrity.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import requests
+from bs4 import BeautifulSoup
+
+
+class Celebrity:
+    BASE_URL = "https://movie.douban.com/celebrity/{}/photos/"
+
+    def __init__(self, celebrity_id):
+        self.url = Celebrity.BASE_URL.format(celebrity_id)
+
+    def photos(self):
+        start = 0
+        while True:
+            next_photos = self.__photos(start)
+            step = len(next_photos)
+            if 0 == step:
+                break
+            for photo in next_photos:
+                # https://img3.doubanio.com/view/photo/thumb/public/p2156276775.jpg
+                # https://img3.doubanio.com/view/photo/raw/public/p2156276775.jpg
+                # https://img3.doubanio.com/view/photo/photo/public/p2156276775.webp
+                yield photo.a['href'], photo.img['src'].replace("photo/thumb", "photo/raw")
+            start += step
+
+    def __photos(self, start):
+        params = {
+            "type": "C",
+            "start": start,
+            "sortby": "vote",
+            "size": "a",
+            "subtype": "a"
+        }
+        r = requests.get(self.url, params=params)
+        soup = BeautifulSoup(r.text, "html.parser")
+        return soup.find_all("div", class_="cover")