Add movie

Signed-off-by: wujiawei <wujiawei@xiaomi.com>
einverne · Aug 4, 2017 · 3ba69c4 · 3ba69c4
1 parent 06c38e6
commit 3ba69c4
Show file tree

Hide file tree

Showing 4 changed files with 93 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -4,6 +4,25 @@ douban downloader
 A simple python script to download douban albums and celebrity
 
 
+Introduction
+-----
+
+目前支持的下载类型
+
+1. 豆瓣相册，比如
+
+    douban-dl https://www.douban.com/photos/album/1641641224/
+
+2. 影人相册图片
+
+    douban-dl https://movie.douban.com/celebrity/1335340/
+    douban-dl https://movie.douban.com/celebrity/1335340/photos
+
+3. 用户所有相册
+
+    douban-dl https://www.douban.com/people/einverne/
+
+
 Installation
 ------------
 

diff --git a/douban/__main__.py b/douban/__main__.py
@@ -6,6 +6,7 @@
 
 from douban.douban_album_dl import get_album_by_id
 from douban.douban_celebrity_dl import get_celebrity_by_id
+from douban.movie import get_movie_by_id
 from douban.people import People
 
 
@@ -51,6 +52,11 @@ def parse_url(url, path):
         for album_id in people.albums():
             get_album_by_id(album_id, os.path.join(path, album_id))
         return
+    match = re.match(r'https?://movie.douban.com/subject/(\d+)', url)
+    if match:
+        movie_id = match.group(1)
+        get_movie_by_id(movie_id, path)
+        return
     print("Not support this url yet")
 
 

diff --git a/douban/album.py b/douban/album.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
 from bs4 import BeautifulSoup
 import requests
 

diff --git a/douban/movie.py b/douban/movie.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+
+import requests
+from bs4 import BeautifulSoup
+
+from utils import file_utils
+
+
+class Movie:
+    BASE_URL = 'https://movie.douban.com/subject/{}/photos'
+
+    def __init__(self, movie_id):
+        self.url = Movie.BASE_URL.format(movie_id)
+
+    def photos(self):
+        start = 0
+        while True:
+            next_photos = self.__photos(start)
+            step = len(next_photos)
+            if 0 == step:
+                break
+            for photo in next_photos:
+                yield photo.a['href'], photo.img["src"].replace("photo/thumb", "photo/raw")
+            start += step
+
+    def __photos(self, start):
+        url = self.url
+        params = {
+            'type': 'S',
+            'start': start,
+            'sortby': 'like',
+            'size': 'a',
+            'subtype': 'a'
+        }
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
+        }
+        r = requests.get(url, params=params, headers=headers)
+        soup = BeautifulSoup(r.text, "html.parser")
+        return soup.find_all("div", class_="cover")
+
+
+def get_movie_by_id(mid, path):
+    idx = 0
+    file_utils.mkdir(path)
+    m = Movie(mid)
+    for refer, photo_url in m.photos():
+        name = os.path.basename(photo_url)
+        full_path = path + '/' + name
+        if os.path.exists(full_path):
+            print('pic {} exist skip'.format(name))
+            continue
+        print('{}: saving {}'.format(idx, name))
+        headers = {
+            'Referer': refer,
+            "User-Agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
+        }
+        file_utils.save_from_url(photo_url, headers, full_path)
+        idx += 1
+    print('saving movie photos to {}'.format(path))
+
+
+if __name__ == '__main__':
+    get_movie_by_id('4191644', '/home/mi/Pictures/419')