User:GrabberBot/cleaner.py
From The Battle for Wesnoth Wiki
#!/usr/bin/python
import urllib, urllib2, sys, re, os
import mediawiki
url3 = "http://allefant.sourceforge.net/wesnoth/graphiclibrary.py?acquire=1"
IMG_TAGS = False
NO_UPDATE = False
COMMANDS_FILE = ""
WIKI_FILE = ""
VERBOSE = False
ONE_A_TIME = False
MOVE_ALL = False
skip = 0
for i in range(len(sys.argv)):
if skip:
skip -= 1
else:
if sys.argv[i] == "-i":
IMG_TAGS = True
elif sys.argv[i] == "-n":
NO_UPDATE = True
elif sys.argv[i] == "-c":
COMMANDS_FILE = sys.argv[i + 1]
skip += 1
elif sys.argv[i] == "-w":
WIKI_FILE = sys.argv[i + 1]
skip += 1
elif sys.argv[i] == "-v":
VERBOSE = True
elif sys.argv[i] == "-o":
ONE_A_TIME = True
elif sys.argv[i] == "-a":
MOVE_ALL = True
if COMMANDS_FILE:
commands = file(COMMANDS_FILE).read().splitlines()
else:
# read commands
commands = urllib2.urlopen(url3).read().splitlines()
if len(commands) == 0:
sys.exit(0)
commands.reverse() # that way, last (most recent) command has priority
mw = mediawiki.MediaWiki()
if WIKI_FILE:
data = file(WIKI_FILE).read()
else:
# read current contents of wiki page
data = mw.fetch("UnsortedContrib")
stats = [0, 0]
moves = {}
modified = 0
def get_name(str):
# make sure the name is not jinxed or it will redirect to the homepage and
# deface the wiki :P
name = ""
for c in str:
if ord(c.lower()) in range(ord("a"), ord("z") + 1):
name += c
return name[0].upper() + name[1:]
def repl(mob):
global commands, moves, modified, IMG_TAGS, VERBOSE, NO_UPDATE, MOVE_ALL, stats
item = mob.group(0)
name = re.compile("""\\* \\[http://wesnoth.org/wiki/.*?Contrib (.*?)\\]""").search(item).group(1)
if IMG_TAGS:
image = re.compile("""^<img src="(http://.*)" />$""",
re.M).search(item).group(1)
else:
image = re.compile("\\| (http://.*) \\|").search(item).group(1)
if VERBOSE:
print name, image
if mob:
for com in commands:
com = com.replace(" ", "%20")
if com == "delete%20" + image:
modified = 1
stats[0] += 1
if VERBOSE:
print " delete"
return ""
elif (com == "move%20" + name + "%20" + image) or MOVE_ALL:
modified = 1
stats[1] += 1
if name in moves:
moves[name] += [image]
else:
moves[name] = [image]
if VERBOSE:
print " move"
if not NO_UPDATE:
return ""
break
# return unmodified
return item
data = re.compile("\\* \\[http://.*?\n", re.S).sub(repl, data)
def check_already(name, link):
mob = re.compile(".*/(.+\\.(?:png|gif|bmp|jpg))").search(link)
pic = mob.group(1)
f = "/home/elias/prog/python/wesnoth/glib/art/%s/%s" % (name, pic)
try:
file(f)
return True
except IOError:
return False
for move in moves:
anything = 0
name = get_name(move)
if VERBOSE:
print name
contrib = mw.fetch(name + "Contrib")
if contrib.strip() == "": # doesn't exist yet
contrib = "== %s ==\n\n\n== See Also ==\n\n[[GraphicLibrary]]" % move
miss = file("missing_artists.txt", "a")
miss.write(name + "\n")
if VERBOSE:
print " creating new page"
firstline = contrib.split("\n", 1)[0].lower()
if firstline.find(move.lower()) < 0 and\
firstline.find(move.split(" ")[0].lower()) < 0:
print "Right page? %s not in \"%s\"" % (move, firstline)
contrib = ""
if VERBOSE:
print " page %s for %s" % ((name + "Contrib"), move)
mob = re.compile("\\s*==.*?== *\n\n").search(contrib)
if mob:
newcontrib = contrib[:mob.end(0)]
for pic in moves[move]:
if not check_already(name, pic):
newcontrib += pic + "\n"
anything += 1
if VERBOSE:
print " add: %s" % pic
else:
if VERBOSE:
print " already: %s" % pic
newcontrib += contrib[mob.end(0):]
contrib = newcontrib
else:
print "Could not add %s for %s." % (moves[move], name)
if anything and not NO_UPDATE:
# Update contributor page
mw.post(name + "Contrib", contrib, "added %d images" % anything)
if ONE_A_TIME and anything:
sys.exit(0)
if modified and WIKI_FILE:
print "Writing data"
file(WIKI_FILE, "w").write(data)
if modified and not NO_UPDATE and not WIKI_FILE:
# Update UnsortedContrib page
mw.post("UnsortedContrib", data, "cleared %d and sorted %d images" %
(stats[0], stats[1]))