Skip to content

Commit

Permalink
[update]多线程验证
Browse files Browse the repository at this point in the history
  • Loading branch information
jinghao_wb committed Sep 27, 2017
1 parent c326ca1 commit e110207
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 35 deletions.
62 changes: 62 additions & 0 deletions Schedule/ProxyCheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name: ProxyCheck
Description : 多线程验证useful_proxy
Author : J_hao
date: 2017/9/26
-------------------------------------------------
Change Activity:
2017/9/26: 多线程验证useful_proxy
-------------------------------------------------
"""
__author__ = 'J_hao'

import sys
from time import sleep
from threading import Thread

sys.path.append('../')

from Util.utilFunction import validUsefulProxy
from Manager.ProxyManager import ProxyManager
from Util.LogHandler import LogHandler


class ProxyCheck(ProxyManager, Thread):

def __init__(self):
ProxyManager.__init__(self)
Thread.__init__(self)
self.log = LogHandler('proxy_check')

def run(self):
self.db.changeTable(self.useful_proxy_queue)
while True:
proxy_item = self.db.pop()
while proxy_item:
proxy = proxy_item.get('proxy')
counter = proxy_item.get('value')
if validUsefulProxy(proxy):
# 验证通过计数器加1, 计数在-5到1之间
if counter and int(counter) < 1:
self.db.put(proxy, num=int(counter) + 1)
else:
self.db.put(proxy)
self.log.info('ProxyCheck: {} validation pass'.format(proxy))
else:
self.log.info('ProxyCheck: {} validation fail'.format(proxy))
# 验证失败,计数器减1
if counter and int(counter) < -5:
self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy))
self.db.delete(proxy)
else:
self.db.put(proxy, num=int(counter) - 1)

proxy_item = self.db.pop()
sleep(60 * 5)


if __name__ == '__main__':
p = ProxyCheck()
p.run()
7 changes: 4 additions & 3 deletions Schedule/ProxyRefreshSchedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,12 @@ def validProxy(self):
:return:
"""
self.db.changeTable(self.raw_proxy_queue)
raw_proxy = self.db.pop()
raw_proxy_item = self.db.pop()
self.log.info('ProxyRefreshSchedule: %s start validProxy' % time.ctime())
# 计算剩余代理,用来减少重复计算
remaining_proxies = self.getAll()
while raw_proxy:
while raw_proxy_item:
raw_proxy = raw_proxy_item.get('proxy')
if isinstance(raw_proxy, bytes):
# 兼容Py3
raw_proxy = raw_proxy.decode('utf8')
Expand All @@ -62,7 +63,7 @@ def validProxy(self):
else:
self.log.info('ProxyRefreshSchedule: %s validation fail' % raw_proxy)
self.db.changeTable(self.raw_proxy_queue)
raw_proxy = self.db.pop()
raw_proxy_item = self.db.pop()
remaining_proxies = self.getAll()
self.log.info('ProxyRefreshSchedule: %s validProxy complete' % time.ctime())

Expand Down
44 changes: 13 additions & 31 deletions Schedule/ProxyValidSchedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,49 +13,31 @@
__author__ = 'JHao'

import sys
from time import sleep

sys.path.append('../')

from Util.utilFunction import validUsefulProxy
from Manager.ProxyManager import ProxyManager
from Util.LogHandler import LogHandler
from Schedule.ProxyCheck import ProxyCheck


class ProxyValidSchedule(ProxyManager):
class ProxyValidSchedule(object):
def __init__(self):
ProxyManager.__init__(self)
self.log = LogHandler('valid_schedule')
pass

def __validProxy(self):
def __validProxy(self, threads=5):
"""
验证代理
验证useful_proxy代理
:param threads: 线程数
:return:
"""
while True:
self.db.changeTable(self.useful_proxy_queue)
for each_proxy in self.db.getAll():
if isinstance(each_proxy, bytes):
# 兼容PY3
each_proxy = each_proxy.decode('utf-8')
thread_list = list()
for index in range(threads):
thread_list.append(ProxyCheck())

value = self.db.get(each_proxy)
if validUsefulProxy(each_proxy):
# 成功计数器加1
if value and int(value) < 1:
self.db.update(each_proxy, 1)
self.log.info('ProxyValidSchedule: {} validation pass'.format(each_proxy))
else:
# 失败计数器减一
if value and int(value) < -5:
# 计数器小于-5删除该代理
self.db.delete(each_proxy)
else:
self.db.update(each_proxy, -1)
self.log.info('ProxyValidSchedule: {} validation fail'.format(each_proxy))
for thread in thread_list:
thread.start()

self.log.info('ProxyValidSchedule running normal')
sleep(60 * 1)
for thread in thread_list:
thread.join()

def main(self):
self.__validProxy()
Expand Down
1 change: 0 additions & 1 deletion log/test.log

This file was deleted.

0 comments on commit e110207

Please sign in to comment.