-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathwallpaper_get.py
executable file
·153 lines (118 loc) · 5.27 KB
/
wallpaper_get.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import requests
from lxml import html
from bs4 import BeautifulSoup as bs4
from urllib.parse import urlsplit
from sys import exit
from io import open as iopen
import configparser
# setting up the config parser
config = configparser.ConfigParser()
config_file = "wp.conf"
config.read(config_file)
# grabbing the config items from the file and creating local variables
# these will be used in the functions to download wallpapers
DIR = config.get('config', 'directory')
PAGES = config.get('config', 'depth')
CATEGORY = config.get('config', 'category')
RESOLUTE = config.get('config', 'resolution')
PURITY = config.get('config', 'nsfw')
# nsfw mapping to wallhaven purity code
# 100 = SFW
# 111 = NSFW
# 110 = Sketchy...?
if PURITY.lower() == 'no':
PCODE = '100'
if PURITY.lower() == 'yes':
PCODE = '111'
# First we need to grab a list of all the image sources
# this function uses the search term and grabs all the hrefs that match to the images
def grab_list_of_curated_urls(url):
headers = {
"User-Agent": "Mozilla/532.0 (Windows NT x.y; rv:10.0) Gecko/20100101 Firefox/10.0"
}
data = requests.get(url, headers=headers)
if data.status_code == 200:
webpage = html.fromstring(data.content)
hrefs = webpage.xpath('//a/@href')
curated_list = []
for url in hrefs:
if len(url) == 43:
curated_list.append(url)
direct_url = []
for url in curated_list:
grab_content = requests.get(url).content
soup = bs4(grab_content, 'lxml')
images = soup.findAll('img')
for image in images:
direct_url.append(image['src'])
return direct_url
else:
print("Received and error code from the server. Please ensure all config is correct without quotes.")
print(f"Error Code: {str(data.status_code)}")
print(f"Error Response:\n {data.content}")
return False
# this function will grab every url and download them to the directory in the config
def requests_image(file_url, DIR):
suffix_list = ['jpg', 'png']
file_name = urlsplit(file_url)[2].split('/')[-1]
file_suffix = file_name.split('.')[1]
i = requests.get(file_url)
if file_suffix in suffix_list and i.status_code == requests.codes.ok:
with iopen(DIR+file_name, 'wb') as file:
file.write(i.content)
return True
else:
return False
# this is a nice little function that gets the total number of pages for the category
# we will use this to ask the user if they want extra wallapers or if they go over
# the number of total wallpapers
def total_pages_of_wallpapers(CATEGORY):
url = f"https://alpha.wallhaven.cc/search?q={CATEGORY}"
html = requests.get(url).content
def match_class(target):
def do_match(tag):
classes = tag.get('class', [])
return all(c in classes for c in target)
return do_match
soup = bs4(html, 'html.parser')
data = soup.find_all('div', class_ = 'thumbs-container thumb-listing infinite-scroll')
pages = data[0].h2.text
total_pages = pages.split(' / ')[1]
return total_pages
# the main function which completes the program
def main(CATEGORY, RESOLUTE, PAGES, PCODE):
print("\n\nCurrently downloading files this takes roughly 20-30 seconds per page")
print("Details below...\n")
print(f"Category: {CATEGORY}")
print(f"Depth: {str(PAGES)}")
print(f"Resolution: {RESOLUTE}")
DOWNLOADS = []
for page in range(1, int(PAGES)):
url = f"https://alpha.wallhaven.cc/search?q={CATEGORY}&categories=111&purity={PCODE}&atleast={RESOLUTE}&sorting=relevance&order=desc&page={str(page)}"
image_list = grab_list_of_curated_urls(url)
for item in image_list:
if 'full' in item:
url = f'https:{item}'
DOWNLOADS.append(item)
requests_image(url, DIR)
print(f"ERROR CODE FROM REQUESTS: {url.status_code}")
print(f"ERROR RESPONSE: \n {url.text}")
return DOWNLOADS
# I just do this cause you are supposed to,
# I'm pretty sure this means you can only call the scrip from the command line
# ie; not as a pythong module
if __name__ in '__main__':
TOTAL_PAGES = int(total_pages_of_wallpapers(CATEGORY))
TPN = int(TOTAL_PAGES)
PN = int(PAGES)
if PN > TPN:
print(f"Looks like the page number in your config [{PAGES}] is greater than the available pages [{TOTAL_PAGES}]")
print(f"Because the programmer is lazy, go put your depth to [{TOTAL_PAGES}] :D")
exit()
if PN < TPN:
print("Just a little bit of info\n#######################################")
print(f"Looks like you are searching {PAGES} out of {TOTAL_PAGES} total pages")
print(f"If you want more wallpapers just update your config depth to: {TOTAL_PAGES}")
main(CATEGORY, RESOLUTE, PAGES, PCODE)
if PN == TPN:
main(CATEGORY, RESOLUTE, PAGES, PCODE)