Skip to content

Commit

Permalink
[update] 更新代理IP抓取
Browse files Browse the repository at this point in the history
  • Loading branch information
jhao104 committed Feb 18, 2019
1 parent 086074c commit 792fd13
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 33 deletions.
10 changes: 5 additions & 5 deletions Config/setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,20 @@
PROXY_GETTER = [
"freeProxyFirst",
"freeProxySecond",
# "freeProxyThird",
# "freeProxyThird", # 网站已不能访问
"freeProxyFourth",
"freeProxyFifth",
# "freeProxySixth"
# "freeProxySixth" # 不再提供免费代理
"freeProxySeventh",
# "freeProxyEight",
# "freeProxyNinth",
"freeProxyTen",
"freeProxyEleven",
"freeProxyTwelve",
# foreign website, outside the wall
"freeProxyWallFirst",
"freeProxyWallSecond",
"freeProxyWallThird"
# "freeProxyWallFirst",
# "freeProxyWallSecond",
# "freeProxyWallThird"
]


Expand Down
2 changes: 0 additions & 2 deletions ProxyGetter/CheckProxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@
"""
__author__ = 'JHao'

import sys
from getFreeProxy import GetFreeProxy
from Util.utilFunction import verifyProxyFormat

sys.path.append('../')

from Util.LogHandler import LogHandler

Expand Down
28 changes: 13 additions & 15 deletions ProxyGetter/getFreeProxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def freeProxyThird(days=1):
pass

@staticmethod
def freeProxyFourth(page_count=2):
def freeProxyFourth(page_count=1):
"""
西刺代理 http://www.xicidaili.com
:return:
Expand Down Expand Up @@ -136,7 +136,7 @@ def freeProxyFifth():
@staticmethod
def freeProxySixth():
"""
讯代理 http://www.xdaili.cn/
讯代理 http://www.xdaili.cn/ 已停用
:return:
"""
url = 'http://www.xdaili.cn/ipagent/freeip/getFreeIps?page=1&rows=10'
Expand All @@ -154,21 +154,19 @@ def freeProxySeventh():
快代理 https://www.kuaidaili.com
"""
url_list = [
'https://www.kuaidaili.com/free/inha/{page}/',
'https://www.kuaidaili.com/free/intr/{page}/'
'https://www.kuaidaili.com/free/inha/',
'https://www.kuaidaili.com/free/intr/'
]
for url in url_list:
for page in range(1, 2):
page_url = url.format(page=page)
tree = getHtmlTree(page_url)
proxy_list = tree.xpath('.//table//tr')
for tr in proxy_list[1:]:
yield ':'.join(tr.xpath('./td/text()')[0:2])
tree = getHtmlTree(url)
proxy_list = tree.xpath('.//table//tr')
for tr in proxy_list[1:]:
yield ':'.join(tr.xpath('./td/text()')[0:2])

@staticmethod
def freeProxyEight():
"""
秘密代理 http://www.mimiip.com 不能用
秘密代理 http://www.mimiip.com 已停用
"""
url_gngao = ['http://www.mimiip.com/gngao/%s' % n for n in range(1, 2)] # 国内高匿
url_gnpu = ['http://www.mimiip.com/gnpu/%s' % n for n in range(1, 2)] # 国内普匿
Expand All @@ -185,7 +183,7 @@ def freeProxyEight():
@staticmethod
def freeProxyNinth():
"""
码农代理 https://proxy.coderbusy.com/ 不能用
码农代理 https://proxy.coderbusy.com/ 已停用
:return:
"""
urls = ['https://proxy.coderbusy.com/classical/country/cn.aspx?page=1']
Expand Down Expand Up @@ -233,7 +231,7 @@ def freeProxyEleven():
@staticmethod
def freeProxyTwelve(page_count=2):
"""
guobanjia http://ip.jiangxianli.com/?page=
http://ip.jiangxianli.com/?page=
免费代理库
超多量
:return:
Expand Down Expand Up @@ -291,7 +289,7 @@ def freeProxyWallThird():
from CheckProxy import CheckProxy

# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyFirst)
CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxySecond)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxySecond)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyThird)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyFourth)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyFifth)
Expand All @@ -300,7 +298,7 @@ def freeProxyWallThird():
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyEight)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyNinth)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyTen)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyEleven)
CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyEleven)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxyTwelve)

# CheckProxy.checkAllGetProxyFunc()
11 changes: 0 additions & 11 deletions Test/testGetFreeProxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,11 @@
"""
__author__ = 'J_hao'

import re
import sys
import requests

try:
from importlib import reload # py3 实际不会实用,只是为了不显示语法错误
except:
reload(sys)
sys.setdefaultencoding('utf-8')

sys.path.append('..')
from ProxyGetter.getFreeProxy import GetFreeProxy
from Config.ConfigGetter import config


# noinspection PyPep8Naming
def testGetFreeProxy():
"""
test class GetFreeProxy in ProxyGetter/GetFreeProxy
Expand Down

0 comments on commit 792fd13

Please sign in to comment.