Skip to content

Commit

Permalink
Add movie
Browse files Browse the repository at this point in the history
Signed-off-by: wujiawei <wujiawei@xiaomi.com>
  • Loading branch information
wujiawei committed Aug 4, 2017
1 parent 06c38e6 commit 3ba69c4
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 0 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,25 @@ douban downloader
A simple python script to download douban albums and celebrity


Introduction
-----

目前支持的下载类型

1. 豆瓣相册,比如

douban-dl https://www.douban.com/photos/album/1641641224/

2. 影人相册图片

douban-dl https://movie.douban.com/celebrity/1335340/
douban-dl https://movie.douban.com/celebrity/1335340/photos

3. 用户所有相册

douban-dl https://www.douban.com/people/einverne/


Installation
------------

Expand Down
6 changes: 6 additions & 0 deletions douban/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from douban.douban_album_dl import get_album_by_id
from douban.douban_celebrity_dl import get_celebrity_by_id
from douban.movie import get_movie_by_id
from douban.people import People


Expand Down Expand Up @@ -51,6 +52,11 @@ def parse_url(url, path):
for album_id in people.albums():
get_album_by_id(album_id, os.path.join(path, album_id))
return
match = re.match(r'https?://movie.douban.com/subject/(\d+)', url)
if match:
movie_id = match.group(1)
get_movie_by_id(movie_id, path)
return
print("Not support this url yet")


Expand Down
2 changes: 2 additions & 0 deletions douban/album.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
import requests

Expand Down
66 changes: 66 additions & 0 deletions douban/movie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os

import requests
from bs4 import BeautifulSoup

from utils import file_utils


class Movie:
BASE_URL = 'https://movie.douban.com/subject/{}/photos'

def __init__(self, movie_id):
self.url = Movie.BASE_URL.format(movie_id)

def photos(self):
start = 0
while True:
next_photos = self.__photos(start)
step = len(next_photos)
if 0 == step:
break
for photo in next_photos:
yield photo.a['href'], photo.img["src"].replace("photo/thumb", "photo/raw")
start += step

def __photos(self, start):
url = self.url
params = {
'type': 'S',
'start': start,
'sortby': 'like',
'size': 'a',
'subtype': 'a'
}
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
}
r = requests.get(url, params=params, headers=headers)
soup = BeautifulSoup(r.text, "html.parser")
return soup.find_all("div", class_="cover")


def get_movie_by_id(mid, path):
idx = 0
file_utils.mkdir(path)
m = Movie(mid)
for refer, photo_url in m.photos():
name = os.path.basename(photo_url)
full_path = path + '/' + name
if os.path.exists(full_path):
print('pic {} exist skip'.format(name))
continue
print('{}: saving {}'.format(idx, name))
headers = {
'Referer': refer,
"User-Agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
}
file_utils.save_from_url(photo_url, headers, full_path)
idx += 1
print('saving movie photos to {}'.format(path))


if __name__ == '__main__':
get_movie_by_id('4191644', '/home/mi/Pictures/419')

0 comments on commit 3ba69c4

Please sign in to comment.