User:GrabberBot/mediawiki.py
From The Battle for Wesnoth Wiki
#!/usr/bin/env python
# encoding: UTF-8
import sys, urllib2, urllib, re
wiki_url = "http://wesnoth.org/wiki"
login_url = "http://wesnoth.org/mw/index.php?title=Special:Userlogin"
#wiki_url = "http://en.wikipedia.org/wiki"
#login_url = "http://en.wikipedia.org/w/index.php?title=Special:Userlogin"
class MediaWiki:
cookies = {}
def __init__(self):
self.read_cookies()
def read_cookies(self):
try:
f = file("cookie.txt")
for line in f.read().split("\n"):
kv = line.split("=", 1)
if len(kv) == 2:
self.cookies[kv[0]] = kv[1]
except IOError:
pass
def store_cookies(self):
f = file("cookie.txt", "w")
for c in self.cookies:
f.write("%s=%s\n" % (c, self.cookies[c]))
def request(self, url):
request = urllib2.Request(url)
#request.set_proxy("localhost:8080", "http")
request.add_header("User-Agent", "GrabberBot")
cookies = ""
for c in self.cookies:
cookies += "%s=%s; " % (c, self.cookies[c])
if cookies:
request.add_header("Cookie", cookies)
return request
def login(self):
print "logging in"
request = self.request(login_url + "&action=submitlogin")
data = {
"wpName": "GrabberBot",
"wpPassword": "\x73\x61\x72\x75\x6d\x61\x6e",
"wpLoginattempt": "Login",
"wpRemember": "1"
}
data = urllib.urlencode(data, True)
class redir(urllib2.HTTPRedirectHandler):
def redirect_request(self2, req, fp, code, msg, hdrs, newurl):
for h in hdrs.getheaders("set-cookie"):
s = h.split("=", 1)
key = s[0]
value = s[1].split(";", 1)[0]
self.cookies[key] = value
r = self.request(newurl)
self.store_cookies()
return r
opener = urllib2.build_opener(redir())
site = opener.open(request, data)
c = site.read()
mob = re.compile("<p class='error'>(.*?)</p>", re.S).search(c)
if mob:
return (True, mob.group(1))
return (False, "Ok")
def fetch(self, page):
request = self.request(wiki_url + "/?title=" + page + "&action=edit")
site = urllib2.urlopen(request)
contents = site.read()
if contents.find("<title>Login required to edit") >= 0:
err = self.login()
if err[0]:
print err[1]
raise "Login failed"
request = self.request(wiki_url + "/?title=" + page + "&action=edit")
site = urllib2.urlopen(request)
contents = site.read()
mob = re.compile("""<input type='hidden' value="(.*?)" name="wpEditToken" />""").search(contents)
self.token = mob.group(1)
mob = re.compile("""<input type='hidden' value="(.*?)" name="wpEdittime" />""").search(contents)
self.time = mob.group(1)
mob = re.compile("""<textarea [^>]*?name="wpTextbox1"[^>]*?>(.*?)</textarea>""", re.S).search(contents)
if mob:
return mob.group(1)
else:
return ""
def post(self, page, text, comment):
request = self.request(wiki_url + "/?title=" + page + "&action=submit")
data = {
"wpSave": "Save page",
"wpSection": "",
"wpSummary": comment,
"wpEdittime": self.time,
"wpTextbox1": text,
"wpMinoredit": "1",
"wpEditToken": self.token}
data = urllib.urlencode(data, True)
class ok(Exception):
pass
class redir(urllib2.HTTPRedirectHandler):
def redirect_request(self2, req, fp, code, msg, hdrs, newurl):
raise ok
opener = urllib2.build_opener(redir())
try:
site = opener.open(request, data)
except ok:
return False
return True
if __name__ == "__main__":
if len(sys.argv) == 2:
mw = MediaWiki()
site = mw.fetch(sys.argv[1])
print site #mw.post(sys.argv[1], site + "\ntest", "test")
else:
print sys.argv[0], "page"