Skip to content

Commit

Permalink
utf handling
Browse files Browse the repository at this point in the history
  • Loading branch information
James Michael DuPont authored and James Michael DuPont committed Aug 3, 2013
1 parent ee404b7 commit 59668c0
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 2 deletions.
3 changes: 2 additions & 1 deletion process.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
fn="enwikipediaorg_w-%s-wikidump/enwikipediaorg_w-%s-history.xml" % (ts, ts)
pn="enwikipediaorg_w-%s-wikidump*" % (ts)
zn="wtarchive%s*" % (ts)
cmd = "python ../speedydeletion.py --validate ./%s" % fn
# cmd = "python ../speedydeletion.py --validate ./%s" % fn
cmd = "python ../speedydeletion.py ./%s" % fn
print cmd
stat=os.system(cmd)
print stat
Expand Down
22 changes: 21 additions & 1 deletion speedydeletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,26 @@ def signal_handler(signal, frame):
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)

import unicodedata

def decode(link) :
b = link
link = unicode(link, 'utf-8')
link = unicodedata.normalize('NFKD', link)
return strip(link)

def decodeuc(link) :
b = link
link = unicode(link)
link = unicodedata.normalize('NFKD', link)
return strip(link)


def strip(link) :
b = link
link = link.encode('ascii','ignore')
return link


import subprocess

Expand Down Expand Up @@ -83,7 +103,7 @@ def main(*args):
title = title.replace("!","_")
title = title.replace("/","_")
title = title.replace("\\","_")

title = decode(title)
try :
if (len(title) < 1):
pywikibot.output(u'empty title:%s' % entry.title)
Expand Down

0 comments on commit 59668c0

Please sign in to comment.