User:GrabberBot/mediawiki.py

From The Battle for Wesnoth Wiki
#!/usr/bin/env python
# encoding: UTF-8

import sys, urllib2, urllib, re

wiki_url = "http://wesnoth.org/wiki"
login_url = "http://wesnoth.org/mw/index.php?title=Special:Userlogin"
#wiki_url = "http://en.wikipedia.org/wiki"
#login_url = "http://en.wikipedia.org/w/index.php?title=Special:Userlogin"

class MediaWiki:
    cookies = {}

    def __init__(self):
        self.read_cookies()

    def read_cookies(self):
        try:
            f = file("cookie.txt")
            for line in f.read().split("\n"):
                kv = line.split("=", 1)
                if len(kv) == 2:
                    self.cookies[kv[0]] = kv[1]
        except IOError:
            pass

    def store_cookies(self):
        f = file("cookie.txt", "w")
        for c in self.cookies:
            f.write("%s=%s\n" % (c, self.cookies[c]))

    def request(self, url):
        request = urllib2.Request(url)
        #request.set_proxy("localhost:8080", "http")
        request.add_header("User-Agent", "GrabberBot")
        cookies = ""
        for c in self.cookies:
            cookies += "%s=%s; " % (c, self.cookies[c])
        if cookies:
            request.add_header("Cookie", cookies)
        return request

    def login(self):
        print "logging in"

        request = self.request(login_url + "&action=submitlogin")
        data = {
            "wpName": "GrabberBot",
            "wpPassword": "\x73\x61\x72\x75\x6d\x61\x6e",
            "wpLoginattempt": "Login",
            "wpRemember": "1"
            }
        data = urllib.urlencode(data, True)

        class redir(urllib2.HTTPRedirectHandler):
            def redirect_request(self2, req, fp, code, msg, hdrs, newurl):
                for h in hdrs.getheaders("set-cookie"):
                    s = h.split("=", 1)
                    key = s[0]
                    value = s[1].split(";", 1)[0]
                    self.cookies[key] = value
                r = self.request(newurl)
                self.store_cookies()
                return r

        opener = urllib2.build_opener(redir())
        site = opener.open(request, data)

        c = site.read()
        mob = re.compile("<p class='error'>(.*?)</p>", re.S).search(c)
        if mob:
            return (True, mob.group(1))

        return (False, "Ok")

    def fetch(self, page):
        page = urllib.quote(page)
        request = self.request(wiki_url + "/?title=" + page + "&action=edit")
        site = urllib2.urlopen(request)
        contents = site.read()

        if contents.find("<title>Login required to edit") >= 0:
            err = self.login()
            if err[0]:
                print err[1]
                raise "Login failed"
            request = self.request(wiki_url + "/?title=" + page + "&action=edit")
            site = urllib2.urlopen(request)
            contents = site.read()

        mob = re.compile("""<input type='hidden' value="(.*?)" name="wpEditToken" />""").search(contents)
        self.token = mob.group(1)
        mob = re.compile("""<input type='hidden' value="(.*?)" name="wpEdittime" />""").search(contents)
        self.time = mob.group(1)

        mob = re.compile("""<textarea [^>]*?name="wpTextbox1"[^>]*?>(.*?)</textarea>""", re.S).search(contents)
        if mob:
            return mob.group(1)
        else:
            return ""

    def post(self, page, text, comment):
        page = urllib.quote(page)
        request = self.request(wiki_url + "/?title=" + page + "&action=submit")
        data = {
            "wpSave": "Save page",
            "wpSection": "",
            "wpSummary": comment,
            "wpEdittime": self.time,
            "wpTextbox1": text,
            "wpMinoredit": "1",
            "wpEditToken": self.token}
        data = urllib.urlencode(data, True)
        class ok(Exception):
            pass
        class redir(urllib2.HTTPRedirectHandler):
            def redirect_request(self2, req, fp, code, msg, hdrs, newurl):
                raise ok
        opener = urllib2.build_opener(redir())
        try:
            site = opener.open(request, data)
        except ok:
            return False
        return True

if __name__ == "__main__":
    if len(sys.argv) == 2:
        mw = MediaWiki()
        site = mw.fetch(sys.argv[1])
        print site #mw.post(sys.argv[1], site + "\ntest", "test")
    else:
        print sys.argv[0], "page"
This page was last edited on 1 November 2005, at 13:22.