User:GrabberBot/mediawiki.py
From The Battle for Wesnoth Wiki
#!/usr/bin/env python # encoding: UTF-8 import sys, urllib2, urllib, re wiki_url = "http://wesnoth.org/wiki" login_url = "http://wesnoth.org/mw/index.php?title=Special:Userlogin" #wiki_url = "http://en.wikipedia.org/wiki" #login_url = "http://en.wikipedia.org/w/index.php?title=Special:Userlogin" class MediaWiki: cookies = {} def __init__(self): self.read_cookies() def read_cookies(self): try: f = file("cookie.txt") for line in f.read().split("\n"): kv = line.split("=", 1) if len(kv) == 2: self.cookies[kv[0]] = kv[1] except IOError: pass def store_cookies(self): f = file("cookie.txt", "w") for c in self.cookies: f.write("%s=%s\n" % (c, self.cookies[c])) def request(self, url): request = urllib2.Request(url) #request.set_proxy("localhost:8080", "http") request.add_header("User-Agent", "GrabberBot") cookies = "" for c in self.cookies: cookies += "%s=%s; " % (c, self.cookies[c]) if cookies: request.add_header("Cookie", cookies) return request def login(self): print "logging in" request = self.request(login_url + "&action=submitlogin") data = { "wpName": "GrabberBot", "wpPassword": "\x73\x61\x72\x75\x6d\x61\x6e", "wpLoginattempt": "Login", "wpRemember": "1" } data = urllib.urlencode(data, True) class redir(urllib2.HTTPRedirectHandler): def redirect_request(self2, req, fp, code, msg, hdrs, newurl): for h in hdrs.getheaders("set-cookie"): s = h.split("=", 1) key = s[0] value = s[1].split(";", 1)[0] self.cookies[key] = value r = self.request(newurl) self.store_cookies() return r opener = urllib2.build_opener(redir()) site = opener.open(request, data) c = site.read() mob = re.compile("<p class='error'>(.*?)</p>", re.S).search(c) if mob: return (True, mob.group(1)) return (False, "Ok") def fetch(self, page): request = self.request(wiki_url + "/?title=" + page + "&action=edit") site = urllib2.urlopen(request) contents = site.read() if contents.find("<title>Login required to edit") >= 0: err = self.login() if err[0]: print err[1] raise "Login failed" request = self.request(wiki_url + "/?title=" + page + "&action=edit") site = urllib2.urlopen(request) contents = site.read() mob = re.compile("""<input type='hidden' value="(.*?)" name="wpEditToken" />""").search(contents) self.token = mob.group(1) mob = re.compile("""<input type='hidden' value="(.*?)" name="wpEdittime" />""").search(contents) self.time = mob.group(1) mob = re.compile("""<textarea [^>]*?name="wpTextbox1"[^>]*?>(.*?)</textarea>""", re.S).search(contents) if mob: return mob.group(1) else: return "" def post(self, page, text, comment): request = self.request(wiki_url + "/?title=" + page + "&action=submit") data = { "wpSave": "Save page", "wpSection": "", "wpSummary": comment, "wpEdittime": self.time, "wpTextbox1": text, "wpMinoredit": "1", "wpEditToken": self.token} data = urllib.urlencode(data, True) class ok(Exception): pass class redir(urllib2.HTTPRedirectHandler): def redirect_request(self2, req, fp, code, msg, hdrs, newurl): raise ok opener = urllib2.build_opener(redir()) try: site = opener.open(request, data) except ok: return False return True if __name__ == "__main__": if len(sys.argv) == 2: mw = MediaWiki() site = mw.fetch(sys.argv[1]) print site #mw.post(sys.argv[1], site + "\ntest", "test") else: print sys.argv[0], "page"