Skip to content

Commit

Permalink
utf8 files not ascii, better handling of utf8 names
Browse files Browse the repository at this point in the history
  • Loading branch information
h4ck3rm1k3 committed Aug 8, 2012
1 parent c6cae26 commit 0b3630a
Showing 1 changed file with 49 additions and 5 deletions.
54 changes: 49 additions & 5 deletions speedydeletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@
from shove import Shove
file_store = Shove('file://wikiaupload')

import signal
import sys
def signal_handler(signal, frame):
print 'You pressed Ctrl+C!'
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)



def main(*args):
genFactory = pagegenerators.GeneratorFactory()
# If xmlfilename is None, references will be loaded from the live wiki.
Expand Down Expand Up @@ -38,7 +47,7 @@ def main(*args):

for entry in dump.parse():
# print file_store[entry.title]
title=entry.title.encode("ascii","ignore")
title=entry.title.encode("utf8","ignore")

if re.search("^Wikipedia:" , entry.title):
pywikibot.output(u'skipping %s' % entry.title)
Expand All @@ -55,21 +64,34 @@ def main(*args):
if re.search("^Main Page" , entry.title):
pywikibot.output(u'skipping %s' % entry.title)
continue
pywikibot.output(u'Considering %s' % entry.title)
# pywikibot.output(u'Considering %s' % entry.title)

title = title.replace(":","_")
title = title.replace("!","_")
title = title.replace("/","_")
title = title.replace("\\","_")

try :
if (len(title) < 1):
pywikibot.output(u'empty title:%s' % entry.title)
continue;

if (file_store[title] ) :
count = count +1
else:
pywikibot.output(u'not exists %s' % entry.title)
except KeyError :
try :
outpage = pywikibot.Page(site=outsite, title=entry.title, insite=outsite)


if outpage.exists():
pywikibot.output(u'there is an article %s' % entry.title)
#pywikibot.output(u'there is an article %s' % entry.title)
try:
file_store[title] = 1
except KeyError :
pywikibot.output(u'key error saving article %s' % entry.title)
pywikibot.output(u'key error saving article %s transformed to %s' % (entry.title , title) )

else:
pywikibot.output(u'is not there, adding %s' % entry.title)
contents = entry.text
Expand All @@ -82,10 +104,29 @@ def main(*args):
outpage.put(contents)
try :
file_store[title] = 1
except:
except KeyboardInterrupt:
print "Bye"
sys.exit()

except KeyError:
pywikibot.output(u'could not save %s! to the list of article' % entry.title)


except KeyboardInterrupt:
print "Bye"
sys.exit()
except KeyError:
pywikibot.output(u'problem with %s! ' % entry.title)

finally:
count = count + 1

except KeyboardInterrupt:
print "Bye"
sys.exit()
except KeyError:
pywikibot.output(u'problem2 with %s! ' % entry.title)

finally:
count = count + 1

Expand All @@ -94,3 +135,6 @@ def main(*args):
main()
finally:
pywikibot.stopme()

##See also
# http://stackoverflow.com/questions/1112343/how-do-i-capture-sigint-in-python

0 comments on commit 0b3630a

Please sign in to comment.