From d7c653c1ae6f49456033b14be9be929bc488a22e Mon Sep 17 00:00:00 2001 From: oXis Date: Mon, 8 Apr 2019 16:00:47 +0100 Subject: [PATCH 1/7] add rotating proxies and socks support --- CHANGELOG.md | 5 ++++ core/requester.py | 2 +- core/utils.py | 65 ++++++++++++++++++++++++++++++++++++----------- core/zap.py | 7 +++-- photon.py | 23 ++++++++++++----- 5 files changed, 77 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f5a0a6..48bc6bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +#### v1.3.2 +- add support for socks proxies +- add rotating proxies +- `-p` now takes `IP:PORT` or `DOMAIN:PORT` with `http://`, `socks5://` or nothing (default is `http`) or a `file` with a list of proxies (`http://` or `socks5://` or nothing) or `generate`. Generate will pull http proxies from the web. + #### v1.3.1 - Added more intels (GENERIC_URL, BRACKET_URL, BACKSLASH_URL, HEXENCODED_URL, URLENCODED_URL, B64ENCODED_URL, IPV4, IPV6, EMAIL, MD5, SHA1, SHA256, SHA512, YARA_PARSE, CREDIT_CARD) - proxy support with `-p, --proxy` option (http proxy only) diff --git a/core/requester.py b/core/requester.py index 11cd8f1..69f921c 100644 --- a/core/requester.py +++ b/core/requester.py @@ -52,7 +52,7 @@ def make_request(url): verify=False, timeout=timeout, stream=True, - proxies=proxies + proxies=random.choice(proxies) ) except TooManyRedirects: return 'dummy' diff --git a/core/utils.py b/core/utils.py index cb32ff2..9619b60 100644 --- a/core/utils.py +++ b/core/utils.py @@ -1,4 +1,6 @@ +import requests import math +import os.path import re import argparse @@ -8,9 +10,6 @@ from core.config import VERBOSE, BAD_TYPES from urllib.parse import urlparse -from urllib.request import ProxyHandler, build_opener, install_opener, Request, urlopen -import urllib.error - def regxy(pattern, response, supress_regex, custom): @@ -144,12 +143,52 @@ def top_level(url, fix_protocol=True): return toplevel +def get_proxies_from_web(): + reponse = requests.get("https://free-proxy-list.net/anonymous-proxy.html") + + regex = re.compile( + r'([0-9]+.[0-9]+.[0-9]+.[0-9]+)([0-9]+)') + + proxy_pool = [] + for match in regex.findall(reponse.text): + proxy = f"{match[0]}:{match[1]}" + proxy_pool.append({"http": proxy, + "https": proxy}) + + return proxy_pool + + +def is_proxy_list(v, proxies): + if os.path.isfile(v): + with open(v, 'r') as _file: + for line in _file: + line = line.strip() + if re.match(r"((http|socks5):\/\/.)?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", line) or \ + re.match(r"((http|socks5):\/\/.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}:(\d{1,5})", line): + proxies.append({"http": line, + "https": line}) + else: + print("%s ignored" % line) + if proxies: + return True + return False + + def ProxyType(v): """ Match IP:PORT or DOMAIN:PORT in a losse manner """ - if re.match(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", v): - return v - elif re.match(r"[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}", v.split(':')[0]): - return v + proxies = [] + if re.match(r"((http|socks5):\/\/.)?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", v): + proxies.append({"http": v, + "https": v}) + return proxies + elif re.match(r"((http|socks5):\/\/.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}:(\d{1,5})", v): + proxies.append({"http": v, + "https": v}) + return proxies + elif is_proxy_list(v, proxies): + return proxies + elif v == "generate": + return get_proxies_from_web() else: raise argparse.ArgumentTypeError( "Proxy should follow IP:PORT or DOMAIN:PORT format") @@ -172,15 +211,11 @@ def luhn(purported): def is_good_proxy(pip): try: - proxy_handler = ProxyHandler(pip) - opener = build_opener(proxy_handler) - opener.addheaders = [('User-agent', 'Mozilla/5.0')] - install_opener(opener) - # change the URL to test here - req = Request('http://www.example.com') - sock = urlopen(req, timeout=3) - except urllib.error.HTTPError as e: + requests.get('http://example.com', proxies=pip, timeout=3) + except requests.exceptions.ConnectTimeout as e: return False except Exception as detail: return False + return True + diff --git a/core/zap.py b/core/zap.py index d300374..3095e56 100644 --- a/core/zap.py +++ b/core/zap.py @@ -1,5 +1,6 @@ import re import requests +import random from core.utils import verb, xml_parser from core.colors import run, good @@ -20,7 +21,8 @@ def zap(input_url, archive, domain, host, internal, robots, proxies): verb('Internal page', url) internal.add(url) # Makes request to robots.txt - response = requests.get(input_url + '/robots.txt', proxies=proxies).text + response = requests.get(input_url + '/robots.txt', + proxies=random.choice(proxies)).text # Making sure robots.txt isn't some fancy 404 page if ' Date: Mon, 8 Apr 2019 16:01:22 +0100 Subject: [PATCH 2/7] socks --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 84aed2c..19be6bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ requests +requests[socks] urllib3 tld \ No newline at end of file From 5e904f33efd4f0e625cc39423db08ecac58bb097 Mon Sep 17 00:00:00 2001 From: oXis Date: Mon, 8 Apr 2019 16:04:38 +0100 Subject: [PATCH 3/7] quit at the correct place.... --- photon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/photon.py b/photon.py index e89cba2..197716c 100644 --- a/photon.py +++ b/photon.py @@ -135,9 +135,9 @@ print("%s Done" % info) if not proxies: print("%s no working proxies, quitting!" % bad) + exit() else: proxies.append(None) - exit() crawl_level = args.level or 2 # Crawling level thread_count = args.threads or 2 # Number of threads From 411a6704f2dcbd2a2688e4b4020b26a50a78d5bf Mon Sep 17 00:00:00 2001 From: oXis Date: Mon, 8 Apr 2019 16:12:32 +0100 Subject: [PATCH 4/7] bump --- photon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/photon.py b/photon.py index 197716c..7eb5e5d 100644 --- a/photon.py +++ b/photon.py @@ -19,7 +19,7 @@ / %s__%s \/ /_ ____ / /_____ ____ / %s/_/%s / __ \/ %s__%s \/ __/ %s__%s \/ __ \\ / ____/ / / / %s/_/%s / /_/ %s/_/%s / / / / - /_/ /_/ /_/\____/\__/\____/_/ /_/ %sv1.3.1%s\n''' % + /_/ /_/ /_/\____/\__/\____/_/ /_/ %sv1.3.2%s\n''' % (red, white, red, white, red, white, red, white, red, white, red, white, red, white, end)) From a5616abed7e247d44ea593f0ddc52e22ea5f8a49 Mon Sep 17 00:00:00 2001 From: oXis Date: Mon, 8 Apr 2019 16:19:12 +0100 Subject: [PATCH 5/7] generate no more --- core/utils.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/core/utils.py b/core/utils.py index 9619b60..e4cc7e3 100644 --- a/core/utils.py +++ b/core/utils.py @@ -143,21 +143,6 @@ def top_level(url, fix_protocol=True): return toplevel -def get_proxies_from_web(): - reponse = requests.get("https://free-proxy-list.net/anonymous-proxy.html") - - regex = re.compile( - r'([0-9]+.[0-9]+.[0-9]+.[0-9]+)([0-9]+)') - - proxy_pool = [] - for match in regex.findall(reponse.text): - proxy = f"{match[0]}:{match[1]}" - proxy_pool.append({"http": proxy, - "https": proxy}) - - return proxy_pool - - def is_proxy_list(v, proxies): if os.path.isfile(v): with open(v, 'r') as _file: @@ -183,12 +168,10 @@ def ProxyType(v): return proxies elif re.match(r"((http|socks5):\/\/.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}:(\d{1,5})", v): proxies.append({"http": v, - "https": v}) + "https": v}) return proxies elif is_proxy_list(v, proxies): return proxies - elif v == "generate": - return get_proxies_from_web() else: raise argparse.ArgumentTypeError( "Proxy should follow IP:PORT or DOMAIN:PORT format") From 0bbfa32a80d4c5728a2ab5238565ea117da3128e Mon Sep 17 00:00:00 2001 From: oXis Date: Mon, 8 Apr 2019 16:19:48 +0100 Subject: [PATCH 6/7] generate no more --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 48bc6bf..cc5fdad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ #### v1.3.2 - add support for socks proxies - add rotating proxies -- `-p` now takes `IP:PORT` or `DOMAIN:PORT` with `http://`, `socks5://` or nothing (default is `http`) or a `file` with a list of proxies (`http://` or `socks5://` or nothing) or `generate`. Generate will pull http proxies from the web. +- `-p` now takes `IP:PORT` or `DOMAIN:PORT` with `http://`, `socks5://` or nothing (default is `http`) or a `file` with a list of proxies (`http://` or `socks5://` or nothing). #### v1.3.1 - Added more intels (GENERIC_URL, BRACKET_URL, BACKSLASH_URL, HEXENCODED_URL, URLENCODED_URL, B64ENCODED_URL, IPV4, IPV6, EMAIL, MD5, SHA1, SHA256, SHA512, YARA_PARSE, CREDIT_CARD) From f17bb53a268d4507a3b8983ee9e02bda56d1b0d5 Mon Sep 17 00:00:00 2001 From: oXis Date: Mon, 8 Apr 2019 16:21:11 +0100 Subject: [PATCH 7/7] lower case proxy_type --- core/utils.py | 2 +- photon.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/utils.py b/core/utils.py index e4cc7e3..c6b5aa3 100644 --- a/core/utils.py +++ b/core/utils.py @@ -159,7 +159,7 @@ def is_proxy_list(v, proxies): return False -def ProxyType(v): +def proxy_type(v): """ Match IP:PORT or DOMAIN:PORT in a losse manner """ proxies = [] if re.match(r"((http|socks5):\/\/.)?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", v): diff --git a/photon.py b/photon.py index 7eb5e5d..455999e 100644 --- a/photon.py +++ b/photon.py @@ -37,7 +37,7 @@ from core.requester import requester from core.updater import updater from core.utils import (luhn, - ProxyType, + proxy_type, is_good_proxy, top_level, extract_headers, @@ -79,7 +79,7 @@ parser.add_argument('--timeout', help='http request timeout', dest='timeout', type=float) parser.add_argument('-p', '--proxy', help='Proxy server IP:PORT or DOMAIN:PORT', dest='proxies', - type=ProxyType) + type=proxy_type) # Switches parser.add_argument('--clone', help='clone the website locally', dest='clone',