-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathweb_checkers.py
101 lines (77 loc) · 3.39 KB
/
web_checkers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import json
import re
import urllib.request as urlr
import urllib.parse as urlp
import checkers
@checkers.cache
def fetch_page(url, *, encoding=None, context=None, **kwargs):
# Don't pass all kwargs because some are not for Request
kwargs = { k: v for k, v in kwargs.items() if k in ['data', 'headers', 'method'] }
req = urlr.Request(url, **kwargs)
with urlr.urlopen(req, context=context) as reply:
if not (200 <= reply.status < 300):
raise Exception("Can't load web page at URL {0}".format(url))
if encoding is None:
# Try automatic detection based on Content-Type
content_type = reply.getheader('Content-Type')
parts = [p.strip() for p in content_type.split(';')]
for p in parts:
if p.lower().startswith('charset='):
encoding = p[8:]
break
# No encoding could be determined, try ascii
if encoding is None:
encoding = "ascii"
data = reply.read()
# If user specified bytes fake encoding, don't decode
if encoding != "bytes":
data = data.decode(encoding)
return data
def scrape(version, *, url, filter_pattern, all_versions=False, case_insensitive=False, **kwargs):
data = fetch_page(url, **kwargs)
versions = re.finditer(filter_pattern, data)
versions = [match.group('version') for match in versions]
# Apply user filters, change delimiter and sort
versions = checkers.prepare_versions(versions, **kwargs)
if len(versions) == 0:
print("WARNING: no matching versions for {0}, pattern {1} with {2}".format(
url, filter_pattern,
checkers.describe_filter(**kwargs)))
return True, 'none', "page URL: {0}".format(url)
if case_insensitive:
version = version.lower()
versions = [version.lower() for version in versions]
# On a webpage you may not have all versions so don't warn if not expected
try:
current_idx = versions.index(version)
except ValueError:
if all_versions:
print("WARNING: version {3} not in versions for {0}, pattern {1} and with {2}".format(
url, filter_pattern,
checkers.describe_filter(**kwargs)), version)
current_idx = -1
latest = versions[0]
return current_idx == 0, latest, "page URL: {0}".format(url)
checkers.register('scrape', scrape)
def apple_store(version, *, productid):
if not isinstance(productid, str):
productid = str(productid)
url = "https://itunes.apple.com/lookup?id=" + str(productid)
# Data will be JSON
data = fetch_page(url)
data = json.loads(data)
if ('resultCount' not in data or
'results' not in data):
raise Exception('Error while loading Apple informations')
count = data['resultCount']
if count <= 0:
raise Exception('Apple Product ID {0} not found'.format(productid))
elif count > 1:
print('WARNING: Apple Product ID {0} not unique, taking first'.format(productid))
product = data['results'][0]
if ('version' not in product or
'trackName' not in product):
raise Exception('No version while loading Apple informations')
latest = product['version']
return version == latest, latest, "product ID: {0}, product name: {1}".format(productid, product['trackName'])
checkers.register('apple store', apple_store)