Skip to content

Commit

Permalink
merging update, better error handling of empty files
Browse files Browse the repository at this point in the history
  • Loading branch information
h4ck3rm1k3 committed Aug 8, 2012
2 parents a294e17 + 5c410d4 commit b080ca5
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 85 deletions.
1 change: 1 addition & 0 deletions crontab.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1,30,20,40,50,10 * * * * bash /home/h4ck3rm1k3/experiments/wikiteam/runexport.sh
2 changes: 1 addition & 1 deletion dumpgenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ def push_zip (file):

year = d.year
month= d.month
block= "wikipedia-delete-v2-%0.4d-%02d" % (year, month)
block= "wikipedia-delete-v3-%0.4d-%02d" % (year, month)
print "going to use %s" % block
conn = boto.connect_s3(host='s3.us.archive.org', is_secure=False)
bucket = conn.get_bucket(block)
Expand Down
8 changes: 5 additions & 3 deletions pywikibot/throttle.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self, mindelay=None, maxdelay=None, writedelay=None,
multiplydelay=True, verbosedelay=False, write=False):
self.lock = threading.RLock()
self.mysite = None
self.ctrlfilename = config.datafilepath('pywikibot', 'throttle.ctrl')
self.ctrlfilename = config.datafilepath('pywikibot2', 'throttle2.ctrl')
self.mindelay = mindelay
if self.mindelay is None:
self.mindelay = config.minthrottle
Expand Down Expand Up @@ -234,7 +234,8 @@ def __call__(self, requestsize=1, write=False):
"""
self.lock.acquire()
try:
wait = self.waittime(write=write or self.write)
#wait = self.waittime(write=write or self.write)
wait = 1
# Calculate the multiplicity of the next delay based on how
# big the request is that is being posted now.
# We want to add "one delay" for each factor of two in the
Expand Down Expand Up @@ -271,7 +272,8 @@ def lag(self, lagtime):
# wait at least 5 seconds but not more than 120 seconds
delay = min(max(5, lagtime//2), 120)
# account for any time we waited while acquiring the lock
wait = delay - (time.time() - started)
# wait = delay - (time.time() - started)
wait = 1
if wait > 0:
if wait > config.noisysleep:
pywikibot.output(
Expand Down
122 changes: 41 additions & 81 deletions speedydeletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,6 @@
from shove import Shove
file_store = Shove('file://wikiaupload')

# def signpage(site,pagename) :

# generator = [pywikibot.Page(
# site,
# pagename
# )]
# # Main Loop
# for page in generator:
# print "going to process %s" % page.urlname()
# try:
# text = page.get()
# except:
# text = ""

# m = re.search("==archived on speedy deletion wikia==" , text)
# if not(m):
# m = re.search("==archived==" , text)
# if not( m):
# summary="notification of speedy deletion page"
# newname =page.urlname()
# newname = newname.replace('Talk%3A', '')
# newtext= "==archived on speedy deletion wikia==\nThis endangered article has been archived here http://speedydeletion.wikia.com/wiki/%s so that it is not lost if deleted. Changes made after the archiving will not be copied.\n~~~~" % newname
# (text, newtext, always) = add_text(page, newtext, summary, regexSkip,
# regexSkipUrl, always, up, True, reorderEnabled=reorderEnabled,
# create=talkPage)
# else:
# print "skipping %s" % page.urlname()
# else:
# print "skipping %s" % page.urlname()

def main(*args):
genFactory = pagegenerators.GeneratorFactory()
# If xmlfilename is None, references will be loaded from the live wiki.
Expand All @@ -51,16 +21,11 @@ def main(*args):
# read command line parameters
for arg in pywikibot.handleArgs(*args):
xmlfilename = arg

print xmlfilename

insite = pywikibot.getSite("en","wikipedia")

importsite = "speedydeletion"

outsite = pywikibot.getSite("en",importsite)
outsite.forceLogin()

dump = xmlreader.XmlDump(xmlfilename)
count = 0

Expand All @@ -74,60 +39,55 @@ def main(*args):
for entry in dump.parse():
# print file_store[entry.title]
title=entry.title.encode("ascii","ignore")


m = re.search("Wikipedia:" , entry.title)
if m:
if re.search("^Wikipedia:" , entry.title):
pywikibot.output(u'skipping %s' % entry.title)
continue
if re.search("^User:" , entry.title):
pywikibot.output(u'skipping %s' % entry.title)
continue
if re.search("^User Talk:" , entry.title):
pywikibot.output(u'skipping %s' % entry.title)
continue
if re.search(".css$" , entry.title):
pywikibot.output(u'skipping %s' % entry.title)
continue
if re.search("^Main Page" , entry.title):
pywikibot.output(u'skipping %s' % entry.title)
next;
if entry.title != "Main Page" :
continue
pywikibot.output(u'Considering %s' % entry.title)
try :
if (file_store[title] ) :
count = count +1
else:
pywikibot.output(u'not exists %s' % entry.title)
except KeyError :
try :
if (file_store[title] ) :
count = count +1
# pywikibot.output(u'was cached %s' % entry.title)
outpage = pywikibot.Page(site=outsite, title=entry.title, insite=outsite)
if outpage.exists():
pywikibot.output(u'there is an article %s' % entry.title)
try:
file_store[title] = 1
except KeyError :
pywikibot.output(u'key error saving article %s' % entry.title)
else:
pywikibot.output(u'not exists %s' % entry.title)
except KeyError :
# print sys.exc_type, ":", "%s is not in the list." % sys.exc_value
# pywikibot.output(u'key error %s' % entry.title)
try :
outpage = pywikibot.Page(site=outsite, title=entry.title, insite=outsite)
if outpage.exists():
pywikibot.output(u'there is an article %s' % entry.title)
try:
file_store[title] = 1
except KeyError :
pywikibot.output(u'key error saving article %s' % entry.title)

pywikibot.output(u'is not there, adding %s' % entry.title)
contents = entry.text
usernames = entry.username
if re.search('Template:', title):
contents = contents + "<noinclude>{{wikipedia-template|%s}}</noinclude>" % usernames
else:
pywikibot.output(u'is not there, adding %s' % entry.title)
contents = entry.text
usernames = entry.username
if re.search('Template:', title):
contents = contents + "<noinclude>{{wikipedia-template|%s}}</noinclude>" % usernames
else:
contents = contents + "\n{{wikipedia-deleted|%s}}" % usernames
# contents = contents + "\n{{wikipedia-deleted|%s}}" % usernames

outpage._site=outsite
outpage.put(contents)

# signpage(insite,"Talk:%s" % pagename)

try :
file_store[title] = 1
except:
pywikibot.output(u'could not save %s! to the list of article' % entry.title)
contents = contents + "\n{{wikipedia-deleted|%s}}" % usernames
outpage._site=outsite
outpage.put(contents)
try :
file_store[title] = 1
except:
pywikibot.output(u'could not process %s! ' % entry.title)
finally:
count = count + 1
except:
pywikibot.output(u'could not process %s! ' % entry.title)
pywikibot.output(u'could not save %s! to the list of article' % entry.title)
finally:
count = count + 1
#print "done with %s %d" % (entry.title, count)

finally:
count = count + 1

if __name__ == "__main__":
try:
Expand Down

0 comments on commit b080ca5

Please sign in to comment.