User:GrabberBot/cleaner.py
From The Battle for Wesnoth Wiki
#!/usr/bin/python import urllib, urllib2, sys, re, os import mediawiki url3 = "http://allefant.sourceforge.net/wesnoth/graphiclibrary.py?acquire=1" IMG_TAGS = False NO_UPDATE = False COMMANDS_FILE = "" WIKI_FILE = "" VERBOSE = False ONE_A_TIME = False MOVE_ALL = False skip = 0 for i in range(len(sys.argv)): if skip: skip -= 1 else: if sys.argv[i] == "-i": IMG_TAGS = True elif sys.argv[i] == "-n": NO_UPDATE = True elif sys.argv[i] == "-c": COMMANDS_FILE = sys.argv[i + 1] skip += 1 elif sys.argv[i] == "-w": WIKI_FILE = sys.argv[i + 1] skip += 1 elif sys.argv[i] == "-v": VERBOSE = True elif sys.argv[i] == "-o": ONE_A_TIME = True elif sys.argv[i] == "-a": MOVE_ALL = True if COMMANDS_FILE: commands = file(COMMANDS_FILE).read().splitlines() else: # read commands commands = urllib2.urlopen(url3).read().splitlines() if len(commands) == 0: sys.exit(0) commands.reverse() # that way, last (most recent) command has priority mw = mediawiki.MediaWiki() if WIKI_FILE: data = file(WIKI_FILE).read() else: # read current contents of wiki page data = mw.fetch("UnsortedContrib") stats = [0, 0] moves = {} modified = 0 def get_name(str): # make sure the name is not jinxed or it will redirect to the homepage and # deface the wiki :P name = "" for c in str: if ord(c.lower()) in range(ord("a"), ord("z") + 1): name += c return name[0].upper() + name[1:] def repl(mob): global commands, moves, modified, IMG_TAGS, VERBOSE, NO_UPDATE, MOVE_ALL, stats item = mob.group(0) name = re.compile("""\\* \\[http://wesnoth.org/wiki/.*?Contrib (.*?)\\]""").search(item).group(1) if IMG_TAGS: image = re.compile("""^<img src="(http://.*)" />$""", re.M).search(item).group(1) else: image = re.compile("\\| (http://.*) \\|").search(item).group(1) if VERBOSE: print name, image if mob: for com in commands: com = com.replace(" ", "%20") if com == "delete%20" + image: modified = 1 stats[0] += 1 if VERBOSE: print " delete" return "" elif (com == "move%20" + name + "%20" + image) or MOVE_ALL: modified = 1 stats[1] += 1 if name in moves: moves[name] += [image] else: moves[name] = [image] if VERBOSE: print " move" if not NO_UPDATE: return "" break # return unmodified return item data = re.compile("\\* \\[http://.*?\n", re.S).sub(repl, data) def check_already(name, link): mob = re.compile(".*/(.+\\.(?:png|gif|bmp|jpg))").search(link) pic = mob.group(1) f = "/home/elias/prog/python/wesnoth/glib/art/%s/%s" % (name, pic) try: file(f) return True except IOError: return False for move in moves: anything = 0 name = get_name(move) if VERBOSE: print name contrib = mw.fetch(name + "Contrib") if contrib.strip() == "": # doesn't exist yet contrib = "== %s ==\n\n\n== See Also ==\n\n[[GraphicLibrary]]" % move miss = file("missing_artists.txt", "a") miss.write(name + "\n") if VERBOSE: print " creating new page" firstline = contrib.split("\n", 1)[0].lower() if firstline.find(move.lower()) < 0 and\ firstline.find(move.split(" ")[0].lower()) < 0: print "Right page? %s not in \"%s\"" % (move, firstline) contrib = "" if VERBOSE: print " page %s for %s" % ((name + "Contrib"), move) mob = re.compile("\\s*==.*?== *\n\n").search(contrib) if mob: newcontrib = contrib[:mob.end(0)] for pic in moves[move]: if not check_already(name, pic): newcontrib += pic + "\n" anything += 1 if VERBOSE: print " add: %s" % pic else: if VERBOSE: print " already: %s" % pic newcontrib += contrib[mob.end(0):] contrib = newcontrib else: print "Could not add %s for %s." % (moves[move], name) if anything and not NO_UPDATE: # Update contributor page mw.post(name + "Contrib", contrib, "added %d images" % anything) if ONE_A_TIME and anything: sys.exit(0) if modified and WIKI_FILE: print "Writing data" file(WIKI_FILE, "w").write(data) if modified and not NO_UPDATE and not WIKI_FILE: # Update UnsortedContrib page mw.post("UnsortedContrib", data, "cleared %d and sorted %d images" % (stats[0], stats[1]))