diff --git a/README.md b/README.md index 17b3bd0..c3c3c19 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,25 @@ douban downloader A simple python script to download douban albums and celebrity +Introduction +----- + +目前支持的下载类型 + +1. 豆瓣相册,比如 + + douban-dl https://www.douban.com/photos/album/1641641224/ + +2. 影人相册图片 + + douban-dl https://movie.douban.com/celebrity/1335340/ + douban-dl https://movie.douban.com/celebrity/1335340/photos + +3. 用户所有相册 + + douban-dl https://www.douban.com/people/einverne/ + + Installation ------------ diff --git a/douban/__main__.py b/douban/__main__.py index 297db3b..814215c 100644 --- a/douban/__main__.py +++ b/douban/__main__.py @@ -6,6 +6,7 @@ from douban.douban_album_dl import get_album_by_id from douban.douban_celebrity_dl import get_celebrity_by_id +from douban.movie import get_movie_by_id from douban.people import People @@ -51,6 +52,11 @@ def parse_url(url, path): for album_id in people.albums(): get_album_by_id(album_id, os.path.join(path, album_id)) return + match = re.match(r'https?://movie.douban.com/subject/(\d+)', url) + if match: + movie_id = match.group(1) + get_movie_by_id(movie_id, path) + return print("Not support this url yet") diff --git a/douban/album.py b/douban/album.py index c0fcdac..8f2ffd4 100644 --- a/douban/album.py +++ b/douban/album.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- from bs4 import BeautifulSoup import requests diff --git a/douban/movie.py b/douban/movie.py new file mode 100644 index 0000000..7a4553a --- /dev/null +++ b/douban/movie.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os + +import requests +from bs4 import BeautifulSoup + +from utils import file_utils + + +class Movie: + BASE_URL = 'https://movie.douban.com/subject/{}/photos' + + def __init__(self, movie_id): + self.url = Movie.BASE_URL.format(movie_id) + + def photos(self): + start = 0 + while True: + next_photos = self.__photos(start) + step = len(next_photos) + if 0 == step: + break + for photo in next_photos: + yield photo.a['href'], photo.img["src"].replace("photo/thumb", "photo/raw") + start += step + + def __photos(self, start): + url = self.url + params = { + 'type': 'S', + 'start': start, + 'sortby': 'like', + 'size': 'a', + 'subtype': 'a' + } + headers = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36' + } + r = requests.get(url, params=params, headers=headers) + soup = BeautifulSoup(r.text, "html.parser") + return soup.find_all("div", class_="cover") + + +def get_movie_by_id(mid, path): + idx = 0 + file_utils.mkdir(path) + m = Movie(mid) + for refer, photo_url in m.photos(): + name = os.path.basename(photo_url) + full_path = path + '/' + name + if os.path.exists(full_path): + print('pic {} exist skip'.format(name)) + continue + print('{}: saving {}'.format(idx, name)) + headers = { + 'Referer': refer, + "User-Agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36' + } + file_utils.save_from_url(photo_url, headers, full_path) + idx += 1 + print('saving movie photos to {}'.format(path)) + + +if __name__ == '__main__': + get_movie_by_id('4191644', '/home/mi/Pictures/419')