Rotating proxies and socks5 support

Quemandoacromo · Apr 21, 2019 · 72dcceb · 72dcceb
2 parents c7326a6 + f17bb53
commit 72dcceb
Show file tree

Hide file tree

Showing 6 changed files with 64 additions and 28 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,8 @@
+#### v1.3.2
+- add support for socks proxies
+- add rotating proxies
+- `-p` now takes `IP:PORT` or `DOMAIN:PORT` with `http://`, `socks5://` or nothing (default is `http`) or a `file` with a list of proxies (`http://` or `socks5://` or nothing). 
+
 #### v1.3.1
 - Added more intels (GENERIC_URL, BRACKET_URL, BACKSLASH_URL, HEXENCODED_URL, URLENCODED_URL, B64ENCODED_URL, IPV4, IPV6, EMAIL, MD5, SHA1, SHA256, SHA512, YARA_PARSE, CREDIT_CARD)
 - proxy support with `-p, --proxy` option (http proxy only)

diff --git a/core/requester.py b/core/requester.py
@@ -52,7 +52,7 @@ def make_request(url):
                 verify=False,
                 timeout=timeout,
                 stream=True,
-                proxies=proxies
+                proxies=random.choice(proxies)
             )
         except TooManyRedirects:
             return 'dummy'

diff --git a/core/utils.py b/core/utils.py
@@ -1,4 +1,6 @@
+import requests
 import math
+import os.path
 import re
 import argparse
 
@@ -8,9 +10,6 @@
 from core.config import VERBOSE, BAD_TYPES
 
 from urllib.parse import urlparse
-from urllib.request import ProxyHandler, build_opener, install_opener, Request, urlopen
-import urllib.error
-
 
 
 def regxy(pattern, response, supress_regex, custom):
@@ -144,12 +143,35 @@ def top_level(url, fix_protocol=True):
     return toplevel
 
 
-def ProxyType(v):
+def is_proxy_list(v, proxies):
+    if os.path.isfile(v):
+        with open(v, 'r') as _file:
+            for line in _file:
+                line = line.strip()
+                if re.match(r"((http|socks5):\/\/.)?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", line) or \
+                   re.match(r"((http|socks5):\/\/.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}:(\d{1,5})", line):
+                    proxies.append({"http": line,
+                                    "https": line})
+                else:
+                    print("%s ignored" % line)
+        if proxies:
+            return True
+    return False
+
+
+def proxy_type(v):
     """ Match IP:PORT or DOMAIN:PORT in a losse manner """
-    if re.match(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", v):
-        return v
-    elif re.match(r"[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}", v.split(':')[0]):
-        return v
+    proxies = []
+    if re.match(r"((http|socks5):\/\/.)?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})", v):
+        proxies.append({"http": v,
+                        "https": v})
+        return proxies
+    elif re.match(r"((http|socks5):\/\/.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}:(\d{1,5})", v):
+        proxies.append({"http": v,
+                        "https": v})
+        return proxies
+    elif is_proxy_list(v, proxies):
+        return proxies
     else:
         raise argparse.ArgumentTypeError(
             "Proxy should follow IP:PORT or DOMAIN:PORT format")
@@ -172,15 +194,11 @@ def luhn(purported):
 
 def is_good_proxy(pip):
     try:
-        proxy_handler = ProxyHandler(pip)
-        opener = build_opener(proxy_handler)
-        opener.addheaders = [('User-agent', 'Mozilla/5.0')]
-        install_opener(opener)
-        # change the URL to test here
-        req = Request('http://www.example.com')
-        sock = urlopen(req, timeout=3)
-    except urllib.error.HTTPError as e:
+        requests.get('http://example.com', proxies=pip, timeout=3)
+    except requests.exceptions.ConnectTimeout as e:
         return False
     except Exception as detail:
         return False
+
     return True
+
diff --git a/core/zap.py b/core/zap.py
@@ -1,5 +1,6 @@
 import re
 import requests
+import random
 
 from core.utils import verb, xml_parser
 from core.colors import run, good
@@ -20,7 +21,8 @@ def zap(input_url, archive, domain, host, internal, robots, proxies):
             verb('Internal page', url)
             internal.add(url)
     # Makes request to robots.txt
-    response = requests.get(input_url + '/robots.txt', proxies=proxies).text
+    response = requests.get(input_url + '/robots.txt',
+                            proxies=random.choice(proxies)).text
     # Making sure robots.txt isn't some fancy 404 page
     if '<body' not in response:
         # If you know it, you know it
@@ -40,7 +42,8 @@ def zap(input_url, archive, domain, host, internal, robots, proxies):
                     robots.add(url)
             print('%s URLs retrieved from robots.txt: %s' % (good, len(robots)))
     # Makes request to sitemap.xml
-    response = requests.get(input_url + '/sitemap.xml', proxies=proxies).text
+    response = requests.get(input_url + '/sitemap.xml',
+                            proxies=random.choice(proxies)).text
     # Making sure robots.txt isn't some fancy 404 page
     if '<body' not in response:
         matches = xml_parser(response)

diff --git a/photon.py b/photon.py
@@ -10,6 +10,7 @@
 import sys
 import time
 import warnings
+import random
 
 from core.colors import good, info, run, green, red, white, end, bad
 
@@ -18,7 +19,7 @@
      / %s__%s \/ /_  ____  / /_____  ____
     / %s/_/%s / __ \/ %s__%s \/ __/ %s__%s \/ __ \\
    / ____/ / / / %s/_/%s / /_/ %s/_/%s / / / /
-  /_/   /_/ /_/\____/\__/\____/_/ /_/ %sv1.3.1%s\n''' %
+  /_/   /_/ /_/\____/\__/\____/_/ /_/ %sv1.3.2%s\n''' %
       (red, white, red, white, red, white, red, white, red, white, red, white,
        red, white, end))
 
@@ -36,7 +37,7 @@
 from core.requester import requester
 from core.updater import updater
 from core.utils import (luhn,
-                        ProxyType,
+                        proxy_type,
                         is_good_proxy,
                         top_level,
                         extract_headers,
@@ -78,7 +79,7 @@
 parser.add_argument('--timeout', help='http request timeout', dest='timeout',
                     type=float)
 parser.add_argument('-p', '--proxy', help='Proxy server IP:PORT or DOMAIN:PORT', dest='proxies',
-                    type=ProxyType)
+                    type=proxy_type)
 
 # Switches
 parser.add_argument('--clone', help='clone the website locally', dest='clone',
@@ -122,13 +123,21 @@
 cook = args.cook or None  # Cookie
 api = bool(args.api)  # Extract high entropy strings i.e. API keys and stuff
 
-proxies = None
+proxies = []
 if args.proxies:
-    proxies = {"http": args.proxies,
-               "https": args.proxies}
-    if not is_good_proxy(proxies):
-        print("%s Proxy doesn't seem to work or timedout" % bad)
+    print("%s Testing proxies, can take a while..." % info)
+    for proxy in args.proxies:
+        if is_good_proxy(proxy):
+            proxies.append(proxy)
+        else:
+            print("%s Proxy %s doesn't seem to work or timedout" %
+                  (bad, proxy['http']))
+    print("%s Done" % info)
+    if not proxies:
+        print("%s no working proxies, quitting!" % bad)
         exit()
+else:
+    proxies.append(None)
 
 crawl_level = args.level or 2  # Crawling level
 thread_count = args.threads or 2  # Number of threads
@@ -164,7 +173,7 @@
     main_url = main_inp
 else:
     try:
-        requests.get('https://' + main_inp, proxies=proxies)
+        requests.get('https://' + main_inp, proxies=random.choice(proxies))
         main_url = 'https://' + main_inp
     except:
         main_url = 'http://' + main_inp

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 requests
+requests[socks]
 urllib3
 tld