Skip to content

Commit

Permalink
[update] 代码规范
Browse files Browse the repository at this point in the history
  • Loading branch information
jhao104 committed Apr 26, 2017
1 parent 36d46dc commit 53570a4
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 18 deletions.
11 changes: 7 additions & 4 deletions Api/ProxyApi.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

app = Flask(__name__)


api_list = {
'get': u'get an usable proxy',
'refresh': u'refresh proxy pool',
Expand All @@ -44,14 +45,16 @@ def get():

@app.route('/refresh/')
def refresh():
ProxyManager().refresh()
# TODO refresh会有守护程序定时执行,由api直接调用性能较差,暂不使用
# ProxyManager().refresh()
pass
return 'success'


@app.route('/get_all/')
def getAll():
proxys = ProxyManager().getAll()
return jsonify(list(proxys))
proxies = ProxyManager().getAll()
return jsonify(list(proxies))


@app.route('/delete/', methods=['GET'])
Expand All @@ -71,4 +74,4 @@ def run():
app.run(host='0.0.0.0', port=5000)

if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
run()
8 changes: 4 additions & 4 deletions Manager/ProxyManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def refresh(self):
for proxyGetter in self.config.proxy_getter_functions:
proxy_set = set()
# fetch raw proxy
for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): #get GetFreeProxy.freeProxyFirst and it is a iteration
for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
if proxy.strip():
self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
proxy_set.add(proxy.strip())
Expand Down Expand Up @@ -77,10 +77,10 @@ def getAll(self):

def get_status(self):
self.db.changeTable(self.raw_proxy_queue)
quan_raw_proxy = self.db.get_status()
total_raw_proxy = self.db.get_status()
self.db.changeTable(self.useful_proxy_queue)
quan_useful_queue = self.db.get_status()
return {'raw_proxy': quan_raw_proxy, 'useful_proxy': quan_useful_queue}
total_useful_queue = self.db.get_status()
return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}

if __name__ == '__main__':
pp = ProxyManager()
Expand Down
4 changes: 3 additions & 1 deletion Schedule/ProxyRefreshSchedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Change Activity:
2016/12/4: 代理定时刷新
2017/03/06: 使用LogHandler添加日志
2017/04/26: raw_proxy_queue验证通过但useful_proxy_queue中已经存在的代理不在放入
-------------------------------------------------
"""

Expand Down Expand Up @@ -44,8 +45,9 @@ def validProxy(self):
self.db.changeTable(self.raw_proxy_queue)
raw_proxy = self.db.pop()
self.log.info('%s start validProxy_a' % time.ctime())
exist_proxy = self.db.getAll()
while raw_proxy:
if validUsefulProxy(raw_proxy):
if validUsefulProxy(raw_proxy) and (raw_proxy not in exist_proxy):
self.db.changeTable(self.useful_proxy_queue)
self.db.put(raw_proxy)
self.log.info('validProxy_a: %s validation pass' % raw_proxy)
Expand Down
3 changes: 1 addition & 2 deletions Schedule/ProxyValidSchedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@ def __validProxy(self):
while True:
self.db.changeTable(self.useful_proxy_queue)
for each_proxy in self.db.getAll():
if isinstance(each_proxy,bytes):
if isinstance(each_proxy, bytes):
each_proxy = each_proxy.decode('utf-8')
self.log.info(u'数据库返回数据类型为bytes')

if validUsefulProxy(each_proxy):
self.log.debug('validProxy_b: {} validation pass'.format(each_proxy))
Expand Down
5 changes: 2 additions & 3 deletions Util/utilClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ def __get__(self, instance, owner):


try:
from configparser import ConfigParser # py3
from configparser import ConfigParser # py3
except:
from configparser import ConfigParser # py2
from ConfigParser import ConfigParser # py2


class ConfigParse(ConfigParser):
Expand All @@ -47,7 +47,6 @@ class ConfigParse(ConfigParser):
def __init__(self):
ConfigParser.__init__(self)


def optionxform(self, optionstr):
return optionstr

Expand Down
9 changes: 5 additions & 4 deletions Util/utilFunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@
logger = LogHandler(__name__)


def getHTMLText(url, headers = {'user':'Mozilla/5.0'}):
def getHTMLText(url, headers={'user': 'Mozilla/5.0'}):
try:
response = requests.get(url, headers = headers, timeout=10)
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
response.encoding = response.apparent_encoding
return response.text
except:
return response.status_code


# noinspection PyPep8Naming
def robustCrawl(func):
def decorate(*args, **kwargs):
Expand Down Expand Up @@ -70,7 +71,7 @@ def getHtmlTree(url, **kwargs):
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8',
}
#取代理服务器用代理服务器访问
# TODO 取代理服务器用代理服务器访问
html = requests.get(url=url, headers=header, timeout=30).content
return etree.HTML(html)

Expand All @@ -83,7 +84,7 @@ def validUsefulProxy(proxy):
"""
proxies = {"https": "https://{proxy}".format(proxy=proxy)}
try:
# 超过30秒的代理就不要了
# 超过20秒的代理就不要了
r = requests.get('https://www.baidu.com/', proxies=proxies, timeout=20, verify=False)
if r.status_code == 200:
logger.debug('%s is ok' % proxy)
Expand Down

0 comments on commit 53570a4

Please sign in to comment.