Skip to content

Commit

Permalink
noise reduction
Browse files Browse the repository at this point in the history
  • Loading branch information
James Michael DuPont authored and James Michael DuPont committed Jun 30, 2012
1 parent 0933aca commit a303130
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 20 deletions.
22 changes: 11 additions & 11 deletions dumpgenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@
def saveName(title):
# an = title.encode("ascii","ignore")
name = urllib.unquote(title)
print "storing %s" % name
# print "storing %s" % name
file_store[name] = title

def isNewTitle(name):
name = urllib.unquote(name)
print "checking %s" % name
# print "checking %s" % name

try :
if (file_store[name] ) :
Expand Down Expand Up @@ -128,7 +128,7 @@ def getNamespaces(config={}):

#retrieve all titles from Special:Allpages, if the wiki is big, perhaps there are sub-Allpages to explore
namespaces = [i for i in set(namespaces)] #uniques
print '%d namespaces found' % (len(namespaces))
# print '%d namespaces found' % (len(namespaces))
return namespaces, namespacenames


Expand All @@ -142,9 +142,9 @@ def getSDTitles(site):
'Candidates_for_speedy_deletion_for_unspecified_reason') :
cat = catlib.Category(site, x)
pages = cat.articlesList(False)
print pages
# print pages
for x in pages :
print x.urlname()
# print x.urlname()
n = x.urlname()
an = n.encode("ascii","ignore")
if (isNewTitle(an)):
Expand All @@ -159,7 +159,7 @@ def getAfd(site):
) :
cat = catlib.Category(site, x)
pages = cat.articlesList(False)
print pages
# print pages
for x in pages :
n = x.urlname()
an = n.encode("ascii","ignore")
Expand Down Expand Up @@ -231,7 +231,7 @@ def getPageTitlesScrapper(config={}):
if not i.group('title') in titles:
titles.append(undoHTMLEntities(text=i.group('title')))
c += 1
print ' %d titles retrieved in the namespace %d' % (c, namespace)
# print ' %d titles retrieved in the namespace %d' % (c, namespace)
return titles

def getPageTitles(config={}):
Expand Down Expand Up @@ -346,7 +346,7 @@ def getXMLPage(config={}, title='', verbose=True):
title_ = title
title_ = re.sub(' ', '_', title_)
title_ = urllib.unquote(title_)
print "after check %s" % title_
# print "after check %s" % title_

# title_ = re.sub('%3A', ':', title_)

Expand Down Expand Up @@ -417,7 +417,7 @@ def parseAfd(xml):
m = re.search('===\[\[(.+)\]\]===', xml)
if (m):
name=m.group(1)
print "found new page %s" % name
print "found new afd page %s" % name
return name
return 0

Expand Down Expand Up @@ -469,7 +469,7 @@ def generateXMLDump(config={}, titles=[], start=''):
titles2 = []
for title in titles:
if not(isNewTitle(title)):
print 'seen %s ' % title
# print 'seen %s ' % title
continue

if not title.strip():
Expand Down Expand Up @@ -501,7 +501,7 @@ def generateXMLDump(config={}, titles=[], start=''):
# now we add in the articles for deletion
for title in titles2:
if not(isNewTitle(title)):
print 'seen %s ' % title
# print 'seen %s ' % title
continue

if not title.strip():
Expand Down
1 change: 0 additions & 1 deletion runexport.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,4 @@ echo going to run process
cd ${HOME}/experiments/wikiteam/data
#pwd
python $HOME/experiments/wikiteam/process.py

rm -rf ${HOME}/experiments/wikiteam/data/wt*
17 changes: 9 additions & 8 deletions speedydeletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,23 +80,24 @@ def main(*args):
else:
pywikibot.output(u'not exists %s' % entry.title)
except KeyError :
print sys.exc_type, ":", "%s is not in the list." % sys.exc_value
pywikibot.output(u'key error %s' % entry.title)
# print sys.exc_type, ":", "%s is not in the list." % sys.exc_value
# pywikibot.output(u'key error %s' % entry.title)
try :
outpage = pywikibot.Page(site=outsite, title=entry.title, insite=outsite)
if outpage.exists():
pywikibot.output(u'there is an article %s' % entry.title)
file_store[title] = 1
else:
pywikibot.output(u'is not there %s' % entry.title)
pywikibot.output(u'is not there, adding %s' % entry.title)
contents = entry.text
usernames = entry.username
contents = contents + "\n{{wikipedia-deleted|%s}}" % usernames
print "going to put outpage"
if re.search('Template:', title):
contents = contents + "\n<noinclude>{{wikipedia-template|%s}}</noinclude>" % usernames
else:
contents = contents + "\n{{wikipedia-deleted|%s}}" % usernames
# contents = contents + "\n{{wikipedia-deleted|%s}}" % usernames

outpage._site=outsite
print outpage.site
print outpage.site.family.name
print outpage.site.lang
outpage.put(contents)

# signpage(insite,"Talk:%s" % pagename)
Expand Down

0 comments on commit a303130

Please sign in to comment.