User:GrabberBot/cleaner.py

From The Battle for Wesnoth Wiki
< User:GrabberBot
Revision as of 20:24, 19 August 2005 by Allefant (talk | contribs) (pasted code)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
#!/usr/bin/python

import urllib, urllib2, sys, re, os
import mediawiki

url3 = "http://allefant.sourceforge.net/wesnoth/graphiclibrary.py?acquire=1"

IMG_TAGS = False
NO_UPDATE = False
COMMANDS_FILE = ""
WIKI_FILE = ""
VERBOSE = False
ONE_A_TIME = False
MOVE_ALL = False

skip = 0
for i in range(len(sys.argv)):
    if skip:
        skip -= 1
    else:
        if sys.argv[i] == "-i":
            IMG_TAGS = True
        elif sys.argv[i] == "-n":
            NO_UPDATE = True
        elif sys.argv[i] == "-c":
            COMMANDS_FILE = sys.argv[i + 1]
            skip += 1
        elif sys.argv[i] == "-w":
            WIKI_FILE = sys.argv[i + 1]
            skip += 1
        elif sys.argv[i] == "-v":
            VERBOSE = True
        elif sys.argv[i] == "-o":
            ONE_A_TIME = True
        elif sys.argv[i] == "-a":
            MOVE_ALL = True

if COMMANDS_FILE:
    commands = file(COMMANDS_FILE).read().splitlines()
else:
    # read commands
    commands = urllib2.urlopen(url3).read().splitlines()
    if len(commands) == 0:
        sys.exit(0)

commands.reverse() # that way, last (most recent) command has priority

mw = mediawiki.MediaWiki()

if WIKI_FILE:
    data = file(WIKI_FILE).read()
else:
    # read current contents of wiki page
    data = mw.fetch("UnsortedContrib")

stats = [0, 0]
moves = {}
modified = 0

def get_name(str):
    # make sure the name is not jinxed or it will redirect to the homepage and
    # deface the wiki :P
    name = ""
    for c in str:
        if ord(c.lower()) in range(ord("a"), ord("z") + 1):
            name += c
    return name[0].upper() + name[1:]

def repl(mob):
    global commands, moves, modified, IMG_TAGS, VERBOSE, NO_UPDATE, MOVE_ALL, stats
    item = mob.group(0)

    name = re.compile("""\\* \\[http://wesnoth.org/wiki/.*?Contrib (.*?)\\]""").search(item).group(1)
    if IMG_TAGS:
        image = re.compile("""^<img src="(http://.*)" />$""",
            re.M).search(item).group(1)
    else:
        image = re.compile("\\| (http://.*) \\|").search(item).group(1)
    if VERBOSE:
        print name, image
    if mob:
        for com in commands:
            com = com.replace(" ", "%20")
            if com == "delete%20" + image:
                modified = 1
                stats[0] += 1
                if VERBOSE:
                    print " delete"
                return ""
            elif (com == "move%20" + name + "%20" + image) or MOVE_ALL:
                modified = 1
                stats[1] += 1
                if name in moves:
                    moves[name] += [image]
                else:
                    moves[name] = [image]
                if VERBOSE:
                    print " move"
                if not NO_UPDATE:
                    return ""
                break
    # return unmodified
    return item

data = re.compile("\\* \\[http://.*?\n", re.S).sub(repl, data)

def check_already(name, link):
    mob = re.compile(".*/(.+\\.(?:png|gif|bmp|jpg))").search(link)
    pic = mob.group(1)
    f = "/home/elias/prog/python/wesnoth/glib/art/%s/%s" % (name, pic)
    try:
        file(f)
        return True
    except IOError:
        return False

for move in moves:
    anything = 0
    name = get_name(move)
    if VERBOSE:
        print name
    contrib = mw.fetch(name + "Contrib")

    if contrib.strip() == "": # doesn't exist yet
        contrib = "== %s ==\n\n\n== See Also ==\n\n[[GraphicLibrary]]" % move
        miss = file("missing_artists.txt", "a")
        miss.write(name + "\n")
        if VERBOSE:
            print " creating new page"

    firstline = contrib.split("\n", 1)[0].lower()
    if firstline.find(move.lower()) < 0 and\
        firstline.find(move.split(" ")[0].lower()) < 0:
        print "Right page? %s not in \"%s\"" % (move, firstline)
        contrib = ""
    if VERBOSE:
        print " page %s for %s" % ((name + "Contrib"), move)

    mob = re.compile("\\s*==.*?== *\n\n").search(contrib)
    if mob:
        newcontrib = contrib[:mob.end(0)]
        for pic in moves[move]:
            if not check_already(name, pic):
                newcontrib += pic + "\n"
                anything += 1
                if VERBOSE:
                    print " add: %s" % pic
            else:
                if VERBOSE:
                    print " already: %s" % pic
        newcontrib += contrib[mob.end(0):]
        contrib = newcontrib
    else:
        print "Could not add %s for %s." % (moves[move], name)

    if anything and not NO_UPDATE:
        # Update contributor page
        mw.post(name + "Contrib", contrib, "added %d images" % anything)

    if ONE_A_TIME and anything:
        sys.exit(0)

if modified and WIKI_FILE:
    print "Writing data"
    file(WIKI_FILE, "w").write(data)

if modified and not NO_UPDATE and not WIKI_FILE:
    # Update UnsortedContrib page
    mw.post("UnsortedContrib", data, "cleared %d and sorted %d images" %
        (stats[0], stats[1]))