forked from s0md3v/Photon
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrequester.py
72 lines (65 loc) · 1.94 KB
/
requester.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import random
import time
import requests
from requests.exceptions import TooManyRedirects
SESSION = requests.Session()
SESSION.max_redirects = 3
def requester(
url,
main_url=None,
delay=0,
cook=None,
headers=None,
timeout=10,
host=None,
proxies=[None],
user_agents=[None],
failed=None,
processed=None
):
"""Handle the requests and return the response body."""
cook = cook or set()
headers = headers or set()
user_agents = user_agents or ['Photon']
failed = failed or set()
processed = processed or set()
# Mark the URL as crawled
processed.add(url)
# Pause/sleep the program for specified time
time.sleep(delay)
def make_request(url):
"""Default request"""
final_headers = headers or {
'Host': host,
# Selecting a random user-agent
'User-Agent': random.choice(user_agents),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip',
'DNT': '1',
'Connection': 'close',
}
try:
response = SESSION.get(
url,
cookies=cook,
headers=final_headers,
verify=False,
timeout=timeout,
stream=True,
proxies=random.choice(proxies)
)
except TooManyRedirects:
return 'dummy'
if 'text/html' in response.headers['content-type'] or \
'text/plain' in response.headers['content-type']:
if response.status_code != '404':
return response.text
else:
response.close()
failed.add(url)
return 'dummy'
else:
response.close()
return 'dummy'
return make_request(url)