Skip to content

Commit

Permalink
Rotating proxies and socks5 support
Browse files Browse the repository at this point in the history
  • Loading branch information
s0md3v authored Apr 21, 2019
2 parents c7326a6 + f17bb53 commit 72dcceb
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 28 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#### v1.3.2
- add support for socks proxies
- add rotating proxies
- `-p` now takes `IP:PORT` or `DOMAIN:PORT` with `http://`, `socks5://` or nothing (default is `http`) or a `file` with a list of proxies (`http://` or `socks5://` or nothing).

#### v1.3.1
- Added more intels (GENERIC_URL, BRACKET_URL, BACKSLASH_URL, HEXENCODED_URL, URLENCODED_URL, B64ENCODED_URL, IPV4, IPV6, EMAIL, MD5, SHA1, SHA256, SHA512, YARA_PARSE, CREDIT_CARD)
- proxy support with `-p, --proxy` option (http proxy only)
Expand Down
2 changes: 1 addition & 1 deletion core/requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def make_request(url):
verify=False,
timeout=timeout,
stream=True,
proxies=proxies
proxies=random.choice(proxies)
)
except TooManyRedirects:
return 'dummy'
Expand Down
50 changes: 34 additions & 16 deletions core/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import requests
import math
import os.path
import re
import argparse

Expand All @@ -8,9 +10,6 @@
from core.config import VERBOSE, BAD_TYPES

from urllib.parse import urlparse
from urllib.request import ProxyHandler, build_opener, install_opener, Request, urlopen
import urllib.error



def regxy(pattern, response, supress_regex, custom):
Expand Down Expand Up @@ -144,12 +143,35 @@ def top_level(url, fix_protocol=True):
return toplevel


def ProxyType(v):
def is_proxy_list(v, proxies):
if os.path.isfile(v):
with open(v, 'r') as _file:
for line in _file:
line = line.strip()
if re.match(r"((http|socks5):\/\/.)?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", line) or \
re.match(r"((http|socks5):\/\/.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}:(\d{1,5})", line):
proxies.append({"http": line,
"https": line})
else:
print("%s ignored" % line)
if proxies:
return True
return False


def proxy_type(v):
""" Match IP:PORT or DOMAIN:PORT in a losse manner """
if re.match(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", v):
return v
elif re.match(r"[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}", v.split(':')[0]):
return v
proxies = []
if re.match(r"((http|socks5):\/\/.)?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", v):
proxies.append({"http": v,
"https": v})
return proxies
elif re.match(r"((http|socks5):\/\/.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}:(\d{1,5})", v):
proxies.append({"http": v,
"https": v})
return proxies
elif is_proxy_list(v, proxies):
return proxies
else:
raise argparse.ArgumentTypeError(
"Proxy should follow IP:PORT or DOMAIN:PORT format")
Expand All @@ -172,15 +194,11 @@ def luhn(purported):

def is_good_proxy(pip):
try:
proxy_handler = ProxyHandler(pip)
opener = build_opener(proxy_handler)
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
install_opener(opener)
# change the URL to test here
req = Request('http://www.example.com')
sock = urlopen(req, timeout=3)
except urllib.error.HTTPError as e:
requests.get('http://example.com', proxies=pip, timeout=3)
except requests.exceptions.ConnectTimeout as e:
return False
except Exception as detail:
return False

return True

7 changes: 5 additions & 2 deletions core/zap.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import requests
import random

from core.utils import verb, xml_parser
from core.colors import run, good
Expand All @@ -20,7 +21,8 @@ def zap(input_url, archive, domain, host, internal, robots, proxies):
verb('Internal page', url)
internal.add(url)
# Makes request to robots.txt
response = requests.get(input_url + '/robots.txt', proxies=proxies).text
response = requests.get(input_url + '/robots.txt',
proxies=random.choice(proxies)).text
# Making sure robots.txt isn't some fancy 404 page
if '<body' not in response:
# If you know it, you know it
Expand All @@ -40,7 +42,8 @@ def zap(input_url, archive, domain, host, internal, robots, proxies):
robots.add(url)
print('%s URLs retrieved from robots.txt: %s' % (good, len(robots)))
# Makes request to sitemap.xml
response = requests.get(input_url + '/sitemap.xml', proxies=proxies).text
response = requests.get(input_url + '/sitemap.xml',
proxies=random.choice(proxies)).text
# Making sure robots.txt isn't some fancy 404 page
if '<body' not in response:
matches = xml_parser(response)
Expand Down
27 changes: 18 additions & 9 deletions photon.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sys
import time
import warnings
import random

from core.colors import good, info, run, green, red, white, end, bad

Expand All @@ -18,7 +19,7 @@
/ %s__%s \/ /_ ____ / /_____ ____
/ %s/_/%s / __ \/ %s__%s \/ __/ %s__%s \/ __ \\
/ ____/ / / / %s/_/%s / /_/ %s/_/%s / / / /
/_/ /_/ /_/\____/\__/\____/_/ /_/ %sv1.3.1%s\n''' %
/_/ /_/ /_/\____/\__/\____/_/ /_/ %sv1.3.2%s\n''' %
(red, white, red, white, red, white, red, white, red, white, red, white,
red, white, end))

Expand All @@ -36,7 +37,7 @@
from core.requester import requester
from core.updater import updater
from core.utils import (luhn,
ProxyType,
proxy_type,
is_good_proxy,
top_level,
extract_headers,
Expand Down Expand Up @@ -78,7 +79,7 @@
parser.add_argument('--timeout', help='http request timeout', dest='timeout',
type=float)
parser.add_argument('-p', '--proxy', help='Proxy server IP:PORT or DOMAIN:PORT', dest='proxies',
type=ProxyType)
type=proxy_type)

# Switches
parser.add_argument('--clone', help='clone the website locally', dest='clone',
Expand Down Expand Up @@ -122,13 +123,21 @@
cook = args.cook or None # Cookie
api = bool(args.api) # Extract high entropy strings i.e. API keys and stuff

proxies = None
proxies = []
if args.proxies:
proxies = {"http": args.proxies,
"https": args.proxies}
if not is_good_proxy(proxies):
print("%s Proxy doesn't seem to work or timedout" % bad)
print("%s Testing proxies, can take a while..." % info)
for proxy in args.proxies:
if is_good_proxy(proxy):
proxies.append(proxy)
else:
print("%s Proxy %s doesn't seem to work or timedout" %
(bad, proxy['http']))
print("%s Done" % info)
if not proxies:
print("%s no working proxies, quitting!" % bad)
exit()
else:
proxies.append(None)

crawl_level = args.level or 2 # Crawling level
thread_count = args.threads or 2 # Number of threads
Expand Down Expand Up @@ -164,7 +173,7 @@
main_url = main_inp
else:
try:
requests.get('https://' + main_inp, proxies=proxies)
requests.get('https://' + main_inp, proxies=random.choice(proxies))
main_url = 'https://' + main_inp
except:
main_url = 'http://' + main_inp
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
requests
requests[socks]
urllib3
tld

0 comments on commit 72dcceb

Please sign in to comment.