User:GrabberBot/mediawiki.py

From The Battle for Wesnoth Wiki
< User:GrabberBot
Revision as of 20:25, 19 August 2005 by Allefant (talk | contribs) (pasted code)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
  1. !/usr/bin/env python
  2. encoding: UTF-8

import sys, urllib2, urllib, re

wiki_url = "http://wesnoth.org/wiki" login_url = "http://wesnoth.org/mw/index.php?title=Special:Userlogin"

  1. wiki_url = "http://en.wikipedia.org/wiki"
  2. login_url = "http://en.wikipedia.org/w/index.php?title=Special:Userlogin"

class MediaWiki:

   cookies = {}
   def __init__(self):
       self.read_cookies()
   def read_cookies(self):
       try:
           f = file("cookie.txt")
           for line in f.read().split("\n"):
               kv = line.split("=", 1)
               if len(kv) == 2:
                   self.cookies[kv[0]] = kv[1]
       except IOError:
           pass
   def store_cookies(self):
       f = file("cookie.txt", "w")
       for c in self.cookies:
           f.write("%s=%s\n" % (c, self.cookies[c]))
   def request(self, url):
       request = urllib2.Request(url)
       #request.set_proxy("localhost:8080", "http")
       request.add_header("User-Agent", "GrabberBot")
       cookies = ""
       for c in self.cookies:
           cookies += "%s=%s; " % (c, self.cookies[c])
       if cookies:
           request.add_header("Cookie", cookies)
       return request
   def login(self):
       print "logging in"
       request = self.request(login_url + "&action=submitlogin")
       data = {
           "wpName": "GrabberBot",
           "wpPassword": "\x73\x61\x72\x75\x6d\x61\x6e",
           "wpLoginattempt": "Login",
           "wpRemember": "1"
           }
       data = urllib.urlencode(data, True)
       class redir(urllib2.HTTPRedirectHandler):
           def redirect_request(self2, req, fp, code, msg, hdrs, newurl):
               for h in hdrs.getheaders("set-cookie"):
                   s = h.split("=", 1)
                   key = s[0]
                   value = s[1].split(";", 1)[0]
                   self.cookies[key] = value
               r = self.request(newurl)
               self.store_cookies()
               return r
       opener = urllib2.build_opener(redir())
       site = opener.open(request, data)
       c = site.read()

mob = re.compile("

(.*?)

", re.S).search(c)

       if mob:
           return (True, mob.group(1))
       return (False, "Ok")
   def fetch(self, page):
       request = self.request(wiki_url + "/?title=" + page + "&action=edit")
       site = urllib2.urlopen(request)
       contents = site.read()
       if contents.find("<title>Login required to edit") >= 0:
           err = self.login()
           if err[0]:
               print err[1]
               raise "Login failed"
           request = self.request(wiki_url + "/?title=" + page + "&action=edit")
           site = urllib2.urlopen(request)
           contents = site.read()
       mob = re.compile("""<input type='hidden' value="(.*?)" name="wpEditToken" />""").search(contents)
       self.token = mob.group(1)
       mob = re.compile("""<input type='hidden' value="(.*?)" name="wpEdittime" />""").search(contents)
       self.time = mob.group(1)
       mob = re.compile("""<textarea [^>]*?name="wpTextbox1"[^>]*?>(.*?)</textarea>""", re.S).search(contents)
       if mob:
           return mob.group(1)
       else:
           return ""
   def post(self, page, text, comment):
       request = self.request(wiki_url + "/?title=" + page + "&action=submit")
       data = {
           "wpSave": "Save page",
           "wpSection": "",
           "wpSummary": comment,
           "wpEdittime": self.time,
           "wpTextbox1": text,
           "wpMinoredit": "1",
           "wpEditToken": self.token}
       data = urllib.urlencode(data, True)
       class ok(Exception):
           pass
       class redir(urllib2.HTTPRedirectHandler):
           def redirect_request(self2, req, fp, code, msg, hdrs, newurl):
               raise ok
       opener = urllib2.build_opener(redir())
       try:
           site = opener.open(request, data)
       except ok:
           return False
       return True

if __name__ == "__main__":

   if len(sys.argv) == 2:
       mw = MediaWiki()
       site = mw.fetch(sys.argv[1])
       print site #mw.post(sys.argv[1], site + "\ntest", "test")
   else:
       print sys.argv[0], "page"