Add douban people support

like https://www.douban.com/people/<douban_id>/photos Signed-off-by: Ein Verne <einverne@gmail.com>
einverne · Aug 3, 2017 · 3d3a739 · 3d3a739
1 parent fe58e98
commit 3d3a739
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -19,6 +19,7 @@ Usage
 
     https://www.douban.com/photos/album/<album_id>
     https://movie.douban.com/celebrity/<celebrity_id>
+    https://www.douban.com/people/<douban_id>/photos
 
 `path` is the folder where images saved, defaults to `./douban`.
 

diff --git a/douban/__main__.py b/douban/__main__.py
@@ -7,6 +7,7 @@
 from douban.celebrity import Celebrity
 from douban.douban_album_dl import get_album
 from douban.douban_celebrity_dl import get_celebrity
+from douban.people import People
 
 
 def get_args():
@@ -29,21 +30,30 @@ def parse_url(url, path):
     """
     https://www.douban.com/photos/album/<album_id>
     https://movie.douban.com/celebrity/<celebrity_id>
+    https://www.douban.com/people/<douban_id>/photos
     :param url:
     :return:
     """
-    match = re.match(r'(http|https)://www.douban.com/photos/album/(\d+)', url)
+    match = re.match(r'https?://www.douban.com/photos/album/(\d+)', url)
     if match:
-        album_id = match.group(2)
+        album_id = match.group(1)
         album = Album(album_id)
         get_album(album, path)
         return
-    match = re.match(r'(http|https)://movie.douban.com/celebrity/(\d+)', url)
+    match = re.match(r'https?://movie.douban.com/celebrity/(\d+)', url)
     if match:
-        celebrity_id = match.group(2)
+        celebrity_id = match.group(1)
         celebrity = Celebrity(celebrity_id)
         get_celebrity(celebrity, path)
         return
+    match = re.match(r'https?://www.douban.com/people/(\w+)(/|/photos)', url)
+    if match:
+        people_id = match.group(1)
+        people = People(people_id)
+        for album_id in people.albums():
+            a = Album(album_id)
+            get_album(a, path + '/' + album_id)
+        return
     print("Not support this url yet")
 
 

diff --git a/douban/douban_album_dl.py b/douban/douban_album_dl.py
@@ -15,6 +15,7 @@
 
 def get_album(album, path):
     idx = 0
+    pwd = os.getcwd()
     file_utils.mkdir(path)
     os.chdir(path)
     for photo_url in album.photos():
@@ -23,3 +24,4 @@ def get_album(album, path):
         file_utils.save_from_url(photo_url, headers, name)
         idx += 1
     print("saving album to {}, total {} images".format(path, idx))
+    os.chdir(pwd)
diff --git a/douban/people.py b/douban/people.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+import re
+import requests
+
+
+class People:
+    BASE_URL = 'https://www.douban.com/people/'
+
+    def __init__(self, user_id):
+        self.uid = user_id
+        self.url = People.BASE_URL + '{}/photos?start='.format(user_id)
+
+    def albums(self):
+        start = 0
+        while True:
+            next_albums = self.__album(start)
+            step = len(next_albums)
+            if 0 == step:
+                break
+            for album_id in next_albums:
+                yield album_id
+            start += step
+
+    def __album(self, start):
+        headers = {
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4',
+            'Connection': 'keep-alive',
+            'DNT': '1',
+            'HOST': 'www.douban.com',
+            'Referer': People.BASE_URL + self.uid,
+            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
+        }
+        url = self.url + str(start)
+        r = requests.get(url, headers=headers)
+        album_urls = re.findall(r'https?://www.douban.com/photos/album/(\d+)', r.text)
+        return set(album_urls)
+
+
+if __name__ == '__main__':
+    lordbean = People('LordBean')
+    for album_id in lordbean.albums():
+        print(album_id)