Automated merge with http://code.pediapress.com/hg/mwlib
authorheiko@brainbot.com
Thu Jul 03 17:11:44 2008 +0200 (12 months ago)
changeset 1127f18354b68a9a
parent 1126 a0e399347e61
parent 1125 60428f1271d7
child 1128 a5f5d8cf71a4
Automated merge with http://code.pediapress.com/hg/mwlib
     1.1 --- a/MANIFEST.in	Thu Jul 03 17:11:35 2008 +0200
     1.2 +++ b/MANIFEST.in	Thu Jul 03 17:11:44 2008 +0200
     1.3 @@ -63,6 +63,7 @@
     1.4  include mwlib/serve.py
     1.5  include mwlib/snippets.py
     1.6  include mwlib/snippets.txt
     1.7 +include mwlib/tagext.py
     1.8  include mwlib/texmap.py
     1.9  include mwlib/timeline.py
    1.10  include mwlib/uparser.py
    1.11 @@ -97,6 +98,7 @@
    1.12  include tests/test_sanitychecker.py
    1.13  include tests/test_scanner.py
    1.14  include tests/test_table.py
    1.15 +include tests/test_tagext.py
    1.16  include tests/test_timeline.py
    1.17  include tests/test_utils.py
    1.18  include tests/test_xhtmlwriter.py
     2.1 --- a/docs/metabook.txt	Thu Jul 03 17:11:35 2008 +0200
     2.2 +++ b/docs/metabook.txt	Thu Jul 03 17:11:44 2008 +0200
     2.3 @@ -74,6 +74,10 @@
     2.4  
     2.5      Unique name of source, e.g. "Wikipedia (en)"
     2.6  
     2.7 +language (string)
     2.8 +
     2.9 +    2-character ISO code of language, e.g. "en"
    2.10 +
    2.11  
    2.12  License
    2.13  -------
     3.1 --- a/mwlib/cdb.py	Thu Jul 03 17:11:35 2008 +0200
     3.2 +++ b/mwlib/cdb.py	Thu Jul 03 17:11:44 2008 +0200
     3.3 @@ -48,7 +48,7 @@
     3.4      def close(self):
     3.5          self.map.close()
     3.6  
     3.7 -    def __iter__(self, fn=None):
     3.8 +    def __iter__(self):
     3.9          len = 2048
    3.10          while len < self.eod:
    3.11              klen, vlen = struct.unpack("<LL", self.map[len:len+8])
    3.12 @@ -57,37 +57,25 @@
    3.13              len += klen
    3.14              val = self.map[len:len+vlen]
    3.15              len += vlen
    3.16 -            if fn:
    3.17 -                yield fn(key, val)
    3.18 -            else:
    3.19 -                yield (key, val)
    3.20 +            yield (key, val)
    3.21  
    3.22      def iteritems(self):
    3.23          return self.__iter__()
    3.24  
    3.25      def iterkeys(self):
    3.26 -        return self.__iter__(lambda k,v: k)
    3.27 +        return (k for k, v in self)
    3.28  
    3.29      def itervalues(self):
    3.30 -        return self.__iter__(lambda k,v: v)
    3.31 +        return (v for k, v in self)
    3.32  
    3.33      def items(self):
    3.34 -        ret = []
    3.35 -        for i in self.iteritems():
    3.36 -            ret.append(i)
    3.37 -        return ret
    3.38 +        return list(self.iteritems())
    3.39  
    3.40      def keys(self):
    3.41 -        ret = []
    3.42 -        for i in self.iterkeys():
    3.43 -            ret.append(i)
    3.44 -        return ret
    3.45 +        return list(self.iterkeys())
    3.46  
    3.47      def values(self):
    3.48 -        ret = []
    3.49 -        for i in self.itervalues():
    3.50 -            ret.append(i)
    3.51 -        return ret
    3.52 +        return list(self.itervalues())
    3.53  
    3.54      def findstart(self):
    3.55          self.loop = 0
     4.1 --- a/mwlib/cdbwiki.py	Thu Jul 03 17:11:35 2008 +0200
     4.2 +++ b/mwlib/cdbwiki.py	Thu Jul 03 17:11:44 2008 +0200
     4.3 @@ -8,206 +8,129 @@
     4.4  import zlib
     4.5  import re
     4.6  
     4.7 -from mwlib import cdb
     4.8 -
     4.9 -try:
    4.10 -    from xml.etree import cElementTree
    4.11 -except ImportError:
    4.12 -    import cElementTree
    4.13 -
    4.14 -ns = '{http://www.mediawiki.org/xml/export-0.3/}'
    4.15 -
    4.16 -wikiindex = "wikiidx"
    4.17 -wikidata = "wikidata.bin"
    4.18 -
    4.19 -
    4.20 +from mwlib import cdb, dumpparser
    4.21  
    4.22  def normname(name):
    4.23      name = name.strip().replace("_", " ")
    4.24      name = name[:1].upper()+name[1:]
    4.25      return name
    4.26  
    4.27 -class Tags:
    4.28 -    page = ns + 'page'
    4.29 +class ZCdbWriter(cdb.CdbMake):
    4.30 +    def __init__(self, indexpath, datapath=None):
    4.31 +        if not datapath:
    4.32 +            datapath = indexpath + 'data.bin'
    4.33 +            indexpath = indexpath + 'idx.cdb'
    4.34  
    4.35 -    # <title> inside <page>
    4.36 -    title = ns + 'title'
    4.37 +        cdb.CdbMake.__init__(self, open(indexpath, 'wb'))
    4.38 +        self.data = open(datapath, 'wb')
    4.39  
    4.40 -    # <revision> inside <page>
    4.41 -    revision = ns + 'revision'
    4.42 +    def add(self, key, val):
    4.43 +        key = key.encode("utf-8")
    4.44 +        val = zlib.compress(val.encode('utf-8')) # NOTE: encode wasn't in original
    4.45 +        pos = self.data.tell()
    4.46 +        self.data.write(val)
    4.47 +        cdb.CdbMake.add(self, key, "%s %s" % (pos, len(val)))
    4.48  
    4.49 -    # <id> inside <revision>
    4.50 -    revid = ns + 'id'
    4.51 +    def finish(self):
    4.52 +        cdb.CdbMake.finish(self)
    4.53 +        self.data.close()
    4.54  
    4.55 -    # <contributor><username> inside <revision>
    4.56 -    username = ns + 'contributor/' + ns + 'username'
    4.57  
    4.58 -    # <text> inside <revision>
    4.59 -    text = ns + 'text'
    4.60 +class ZCdbReader(cdb.Cdb):
    4.61 +    def __init__(self, indexpath, datapath=None):
    4.62 +        if not datapath:
    4.63 +            datapath = indexpath + 'data.bin'
    4.64 +            indexpath = indexpath + 'idx.cdb'
    4.65  
    4.66 -    # <timestamp> inside <revision>
    4.67 -    timestamp = ns + 'timestamp'
    4.68 +        cdb.Cdb.__init__(self, open(indexpath, 'rb'))
    4.69 +        self.datapath = datapath
    4.70  
    4.71 -    # <revision><text> inside <page>
    4.72 -    revision_text = ns + 'revision/' + ns + 'text'
    4.73 +    def __getitem__(self, key):
    4.74 +        key = key.encode("utf-8")
    4.75 +        data = cdb.Cdb.__getitem__(self, key) # may raise KeyError 
    4.76 +        return self._readz(data)
    4.77  
    4.78 -    siteinfo = ns + "siteinfo"
    4.79 +    def _readz(self, data):
    4.80 +        pos, len = map(int, data.split())
    4.81 +        
    4.82 +        f=open(self.datapath, "rb")
    4.83 +        f.seek(pos)
    4.84 +        d=f.read(len)
    4.85 +        f.close()
    4.86 +        return zlib.decompress(d).decode('utf-8')
    4.87  
    4.88 -class DumpParser(object):
    4.89 -    category_ns = set(['category', 'kategorie'])
    4.90 -    image_ns = set(['image', 'bild'])
    4.91 -    template_ns = set(['template', 'vorlage'])
    4.92 -    wikipedia_ns = set(['wikipedia'])
    4.93 +    def iterkeys(self):
    4.94 +        return (k.decode('utf-8') for k in cdb.Cdb.iterkeys(self))
    4.95  
    4.96 -    tags = Tags()
    4.97 +    def iteritems(self):
    4.98 +        return ((k.decode('utf-8'), self._readz(v))
    4.99 +            for k,v in cdb.Cdb.iteritems(self))
   4.100  
   4.101 +    def itervalues(self):
   4.102 +        return (self._readz(v) for v in cdb.Cdb.itervalues(self))
   4.103  
   4.104 -    def __init__(self, xmlfilename):
   4.105 -        self.xmlfilename = xmlfilename
   4.106  
   4.107 -    def _write(self, msg):
   4.108 -        sys.stdout.write(msg)
   4.109 -        sys.stdout.flush()
   4.110 -
   4.111 -    def openInputStream(self):
   4.112 -        if self.xmlfilename.lower().endswith(".bz2"):
   4.113 -            f = os.popen("bunzip2 -c %s" % self.xmlfilename, "r")
   4.114 -        elif self.xmlfilename.lower().endswith(".7z"):
   4.115 -            f = os.popen("7z -so x %s" % self.xmlfilename, "r")
   4.116 +class BuildWiki():
   4.117 +    def __init__(self, dumpfile, outputdir, prefix='wiki'):
   4.118 +        if type(dumpfile) in (type(''), type(u'')):
   4.119 +            self.dumpParser = dumpparser.DumpParser(dumpfile)
   4.120          else:
   4.121 -            f = open(self.xmlfilename, "r")        
   4.122 -
   4.123 -        return f
   4.124 -
   4.125 -    def __call__(self):
   4.126 -        f = self.openInputStream()    
   4.127 -        
   4.128 -        count = 0
   4.129 -        for event, elem in cElementTree.iterparse(f):
   4.130 -            if elem.tag != self.tags.page:
   4.131 -                continue
   4.132 -            self.handlePageElement(elem)
   4.133 -            elem.clear()
   4.134 -            count += 1
   4.135 -            
   4.136 -            if count % 5000 == 0:
   4.137 -                self._write(" %s\n" % count)            
   4.138 -            elif count % 100 == 0:
   4.139 -                self._write(".")
   4.140 -
   4.141 -    
   4.142 -    def handlePageElement(self, page):
   4.143 -        title = page.find(self.tags.title).text
   4.144 -        revisions = page.findall(self.tags.revision)
   4.145 -        if not revisions:
   4.146 -            return
   4.147 -        revision = revisions[-1]
   4.148 -        
   4.149 -        texttag = revision.find(self.tags.text)
   4.150 -        timestamptag = revision.find(self.tags.timestamp)
   4.151 -        revision.clear()
   4.152 -        
   4.153 -        if texttag is not None:
   4.154 -            text = texttag.text
   4.155 -            texttag.clear()
   4.156 -        else:
   4.157 -            text = None
   4.158 -            
   4.159 -        if timestamptag is not None:
   4.160 -            timestamp = timestamptag.text
   4.161 -            timestamptag.clear()
   4.162 -        else:
   4.163 -            timestamp = None
   4.164 -        
   4.165 -        if not text:
   4.166 -            return
   4.167 -
   4.168 -        if isinstance(title, str):
   4.169 -            title = unicode(title)
   4.170 -        if isinstance(text, str):
   4.171 -            text = unicode(text)
   4.172 -
   4.173 -            
   4.174 -        if ':' in title:
   4.175 -            ns, rest = title.split(':', 1)
   4.176 -            ns = ns.lower()
   4.177 -            if ns not in self.template_ns:
   4.178 -                return
   4.179 -            self.handleTemplate(rest, text, timestamp)
   4.180 -        else:
   4.181 -            self.handleArticle(title, text, timestamp)
   4.182 -
   4.183 -    def handleArticle(self, title, text, timestamp):
   4.184 -        print "ART:", repr(title), len(text), timestamp
   4.185 -
   4.186 -    def handleTemplate(self, title, text, timestamp):
   4.187 -        print "TEMPL:", repr(title), len(text), timestamp
   4.188 -
   4.189 -class BuildWiki(DumpParser):
   4.190 -    def __init__(self, xmlfilename, outputdir):
   4.191 -        DumpParser.__init__(self, xmlfilename)
   4.192 +            self.dumpParser = dumpfile
   4.193 +        self.output_path = os.path.join(outputdir, prefix)
   4.194          self.outputdir = outputdir
   4.195          
   4.196      def __call__(self):
   4.197          if not os.path.exists(self.outputdir):
   4.198              os.makedirs(self.outputdir)
   4.199          
   4.200 -        n = os.path.join(self.outputdir, wikiindex)
   4.201 -        out = open(os.path.join(self.outputdir, wikidata), "wb")
   4.202 -        self.out = out
   4.203 -        f = open(n+'.cdb', 'wb')
   4.204 -        c = cdb.CdbMake(f)
   4.205 -        self.cdb = c
   4.206 +        self.writer = ZCdbWriter(self.output_path)
   4.207  
   4.208 -        DumpParser.__call__(self)
   4.209 -        c.finish()
   4.210 -        f.close()
   4.211 +        count = 0
   4.212 +        for page in self.dumpParser:
   4.213 +            if page.namespace == dumpparser.NS_MAIN:
   4.214 +                self.handleArticle(page.title, page.text, page.timestamp)
   4.215 +            elif page.namespace == dumpparser.NS_TEMPLATE:
   4.216 +                self.handleTemplate(page.title, page.text, page.timestamp)
   4.217 +            else:
   4.218 +                self.handleOther(page.title, page.text, page.timestamp)
   4.219  
   4.220 +            count += 1
   4.221 +            if count % 5000 == 0:
   4.222 +                self._write(" %s\n" % count)
   4.223 +            elif count % 100 == 0:
   4.224 +                self._write(".")
   4.225 +            
   4.226 +        self.writer.finish()
   4.227  
   4.228 -    def _writeobj(self, key, val):
   4.229 -        key = key.encode("utf-8")
   4.230 -        val = zlib.compress(val)
   4.231 -        pos = self.out.tell()
   4.232 -        self.out.write(val)
   4.233 -        self.cdb.add(key, "%s %s" % (pos, len(val)))
   4.234 +    def _write(self, msg):
   4.235 +        sys.stdout.write(msg)
   4.236 +        sys.stdout.flush()
   4.237  
   4.238      def handleArticle(self, title, text, timestamp):
   4.239 -        self._writeobj(u":"+title, text.encode("utf-8"))
   4.240 +        self.writer.add(u":"+title, text)
   4.241  
   4.242      def handleTemplate(self, title, text, timestamp):
   4.243 -        self._writeobj(u"T:"+title, text.encode("utf-8"))
   4.244 +        self.writer.add(u"T:"+title, text)
   4.245 +
   4.246 +    def handleOther(self, title, text, timestamp):
   4.247 +        self.writer.add(title, text)
   4.248      
   4.249  
   4.250  
   4.251  class WikiDB(object):
   4.252      redirect_rex = re.compile(r'^#Redirect:?\s*?\[\[(?P<redirect>.*?)\]\]', re.IGNORECASE)
   4.253  
   4.254 -    def __init__(self, dir):
   4.255 +    def __init__(self, dir, prefix='wiki'):
   4.256          self.dir = dir
   4.257 -        self.obj2pos_path = os.path.join(self.dir, wikidata)
   4.258 -        self.cdb = cdb.Cdb(open(os.path.join(self.dir, wikiindex+'.cdb'), 'rb'))
   4.259 -
   4.260 -    def _readobj(self, key):
   4.261 -        key = key.encode("utf-8")
   4.262 -
   4.263 -        try:
   4.264 -            data = self.cdb[key]  
   4.265 -        except KeyError:
   4.266 -            return None
   4.267 -
   4.268 -        pos, len = map(int, data.split())
   4.269 -        
   4.270 -        f=open(self.obj2pos_path, "rb")
   4.271 -        f.seek(pos)
   4.272 -        d=f.read(len)
   4.273 -        f.close()
   4.274 -        return zlib.decompress(d)
   4.275 +        self.reader = ZCdbReader(os.path.join(self.dir, prefix))
   4.276  
   4.277      def getRawArticle(self, title, raw=None, revision=None):
   4.278          title = normname(title)
   4.279 -        res = self._readobj(":"+title)
   4.280 -        if res is None:
   4.281 -            return  None
   4.282 +        print repr(title)
   4.283 +        try:
   4.284 +            res = self.reader[":"+title]
   4.285 +        except KeyError:
   4.286 +            return None
   4.287  
   4.288          res = unicode(res, 'utf-8')
   4.289          mo = self.redirect_rex.search(res)
   4.290 @@ -224,9 +147,10 @@
   4.291              title = title.split(':', 1)[1]
   4.292  
   4.293          title = normname(title)
   4.294 -        res = unicode(self._readobj(u"T:"+title) or "", 'utf-8')
   4.295 -        if not res:
   4.296 -            return res
   4.297 +        try:
   4.298 +            res = self.reader["T:"+title]
   4.299 +        except KeyError:
   4.300 +            return ''
   4.301  
   4.302          mo = self.redirect_rex.search(res)
   4.303          if mo:
   4.304 @@ -237,7 +161,12 @@
   4.305  
   4.306  
   4.307      def articles(self):
   4.308 -        for k, v in self.cdb:
   4.309 -            if k[0]==':':
   4.310 -                k = unicode(k[1:], "utf-8")
   4.311 -                yield k
   4.312 +        return (k[1:]
   4.313 +                for k in self.reader.iterkeys()
   4.314 +                if k[0] == ':')
   4.315 +
   4.316 +    def article_texts(self):
   4.317 +        return ((k[1:], v)
   4.318 +                for k in self.reader.iteritems()
   4.319 +                if k[0] == ':')
   4.320 +        
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/mwlib/dumpparser.py	Thu Jul 03 17:11:44 2008 +0200
     5.3 @@ -0,0 +1,210 @@
     5.4 +import os
     5.5 +import re
     5.6 +
     5.7 +try:
     5.8 +    from xml.etree import cElementTree
     5.9 +except ImportError:
    5.10 +    import cElementTree
    5.11 +
    5.12 +ns = '{http://www.mediawiki.org/xml/export-0.3/}'
    5.13 +class Tags:
    5.14 +
    5.15 +    # <namespaces><namespace> inside <siteinfo>
    5.16 +    namespace = ns + 'namespaces/' + ns + 'namespace'
    5.17 +
    5.18 +    page = ns + 'page'
    5.19 +
    5.20 +    # <title> inside <page>
    5.21 +    title = ns + 'title'
    5.22 +
    5.23 +    # <revision> inside <page>
    5.24 +    revision = ns + 'revision'
    5.25 +
    5.26 +    # <id> inside <revision>
    5.27 +    revid = ns + 'id'
    5.28 +
    5.29 +    # <contributor><username> inside <revision>
    5.30 +    username = ns + 'contributor/' + ns + 'username'
    5.31 +
    5.32 +    # <text> inside <revision>
    5.33 +    text = ns + 'text'
    5.34 +
    5.35 +    # <timestamp> inside <revision>
    5.36 +    timestamp = ns + 'timestamp'
    5.37 +
    5.38 +    # <revision><text> inside <page>
    5.39 +    revision_text = ns + 'revision/' + ns + 'text'
    5.40 +
    5.41 +    siteinfo = ns + "siteinfo"
    5.42 +
    5.43 +NS_MEDIA          = -2
    5.44 +NS_SPECIAL        = -1
    5.45 +NS_MAIN           =  0
    5.46 +NS_TALK           =  1
    5.47 +NS_USER           =  2
    5.48 +NS_USER_TALK      =  3
    5.49 +NS_PROJECT        =  4
    5.50 +NS_PROJECT_TALK   =  5
    5.51 +NS_IMAGE          =  6
    5.52 +NS_IMAGE_TALK     =  7
    5.53 +NS_MEDIAWIKI      =  8
    5.54 +NS_MEDIAWIKI_TALK =  9
    5.55 +NS_TEMPLATE       = 10
    5.56 +NS_TEMPLATE_TALK  = 11
    5.57 +NS_HELP           = 12
    5.58 +NS_HELP_TALK      = 13
    5.59 +NS_CATEGORY       = 14
    5.60 +NS_CATEGORY_TALK  = 15
    5.61 +
    5.62 +class Page(object):
    5.63 +    __slots__ = [
    5.64 +        'title', 'pageid', 'namespace_text',
    5.65 +        'namespace',
    5.66 +        'revid', 'timestamp',
    5.67 +        'username', 'userid',
    5.68 +        'minor', 'comment', 'text'
    5.69 +    ]
    5.70 +
    5.71 +    def __init__(self):
    5.72 +        self.namespace_text = ''
    5.73 +        self.namespace = NS_MAIN
    5.74 +
    5.75 +    redirect_rex = re.compile(r'^#Redirect:?\s*?\[\[(?P<redirect>.*?)\]\]', re.IGNORECASE)
    5.76 +
    5.77 +    @property
    5.78 +    def redirect(self):
    5.79 +        mo = self.redirect_rex.search(self.text)
    5.80 +        if mo:
    5.81 +            return mo.group('redirect').split("|", 1)[0]
    5.82 +        return None
    5.83 +
    5.84 +    def __repr__(self):
    5.85 +        text = repr(self.text[:50])
    5.86 +        redir = self.redirect
    5.87 +        if redir:
    5.88 +            text = "Redirect to %s" % repr(redir)
    5.89 +        return 'Page(%s (@%s): %s)' % (repr(self.title), self.timestamp, text)
    5.90 +
    5.91 +
    5.92 +class DumpParser(object):
    5.93 +    namespaces = {
    5.94 +        'template': NS_TEMPLATE,
    5.95 +        'vorlage': NS_TEMPLATE,
    5.96 +        'category': NS_CATEGORY,
    5.97 +        'kategorie': NS_CATEGORY,
    5.98 +        'image': NS_IMAGE,
    5.99 +        'bild': NS_IMAGE,
   5.100 +        'wikipedia': NS_PROJECT,
   5.101 +    }
   5.102 +
   5.103 +    default_namespaces = [NS_MAIN, NS_TEMPLATE]
   5.104 +
   5.105 +    tags = Tags()
   5.106 +
   5.107 +    def __init__(self, xmlfilename,
   5.108 +                 namespace_filter=default_namespaces,
   5.109 +                 ignore_redirects=False):
   5.110 +        self.xmlfilename = xmlfilename
   5.111 +        self.namespace_filter = namespace_filter
   5.112 +        self.ignore_redirects = ignore_redirects
   5.113 +
   5.114 +    def openInputStream(self):
   5.115 +        if self.xmlfilename.lower().endswith(".bz2"):
   5.116 +            f = os.popen("bunzip2 -c %s" % self.xmlfilename, "r")
   5.117 +        elif self.xmlfilename.lower().endswith(".7z"):
   5.118 +            f = os.popen("7z -so x %s" % self.xmlfilename, "r")
   5.119 +        else:
   5.120 +            f = open(self.xmlfilename, "r")        
   5.121 +
   5.122 +        return f
   5.123 +
   5.124 +    @staticmethod
   5.125 +    def getTag(elem):
   5.126 +        # rough is good enough
   5.127 +        return elem.tag[elem.tag.rindex('}')+1:]
   5.128 +
   5.129 +    def handleSiteinfo(self, siteinfo):
   5.130 +        for nsElem in siteinfo.findall(self.tags.namespace):
   5.131 +            try:
   5.132 +                self.namespaces[nsElem.text.lower()] = int(nsElem.get('key'))
   5.133 +            except AttributeError:
   5.134 +                # text is probably None
   5.135 +                pass
   5.136 +        
   5.137 +    def __iter__(self):
   5.138 +        f = self.openInputStream()    
   5.139 +        
   5.140 +        elemIter = (el for evt, el in cElementTree.iterparse(f))
   5.141 +        for elem in elemIter:
   5.142 +            if self.getTag(elem) == 'page':
   5.143 +                page = self.handlePageElement(elem)
   5.144 +                if page:
   5.145 +                    yield page
   5.146 +                elem.clear()
   5.147 +            elif self.getTag(elem) == 'siteinfo':
   5.148 +                self.handleSiteinfo(elem)
   5.149 +                elem.clear()
   5.150 +        
   5.151 +        f.close()
   5.152 +    
   5.153 +    def handlePageElement(self, pageElem):
   5.154 +        res = Page()
   5.155 +        lastRevision = None
   5.156 +        for el in pageElem:
   5.157 +            tag = self.getTag(el)
   5.158 +            if tag == 'title':
   5.159 +                title = unicode(el.text)
   5.160 +                if ':' in title:
   5.161 +                    ns, rest = title.split(':', 1)
   5.162 +                    res.namespace = self.namespaces.get(ns.lower(), NS_MAIN)
   5.163 +                    if res.namespace:
   5.164 +                        title = rest
   5.165 +                        res.namespace_text = ns
   5.166 +                res.title = title
   5.167 +                if res.namespace not in self.namespace_filter:
   5.168 +                    return None
   5.169 +
   5.170 +            elif tag == 'id':
   5.171 +                res.pageid = int(el.text)
   5.172 +
   5.173 +            elif tag == 'revision':
   5.174 +                lastRevision = el
   5.175 +
   5.176 +        if lastRevision:
   5.177 +            self.handleRevisionElement(lastRevision, res)
   5.178 +
   5.179 +        if self.ignore_redirects and res.redirect:
   5.180 +            return None
   5.181 +
   5.182 +        return res
   5.183 +
   5.184 +    def handleRevisionElement(self, revElem, res):
   5.185 +        for el in revElem:
   5.186 +            tag = self.getTag(el)
   5.187 +            if tag == 'id':
   5.188 +                res.revid = int(el.text)
   5.189 +            elif tag == 'timestamp':
   5.190 +                res.timestamp = el.text
   5.191 +            elif tag == 'contributor':
   5.192 +                pass
   5.193 +                #res.username, res.userid = self.handleContributorElement(el)
   5.194 +            elif tag == 'minor':
   5.195 +                res.minor = True
   5.196 +            elif tag == 'comment':
   5.197 +                res.comment = unicode(el.text)
   5.198 +            elif tag == 'text':
   5.199 +                res.text = unicode(el.text)
   5.200 +                el.clear()
   5.201 +
   5.202 +        return res
   5.203 +
   5.204 +    def handleContributorElement(self, conElem):
   5.205 +        username = None
   5.206 +        userid = None
   5.207 +        for el in conElem:
   5.208 +            if self.getTag(el) == 'username':
   5.209 +                username = unicode(el.text)
   5.210 +            elif self.getTag(el) == 'id':
   5.211 +                userid = int(el.text)
   5.212 +        return (username, userid)
   5.213 +
     6.1 --- a/mwlib/metabook.py	Thu Jul 03 17:11:35 2008 +0200
     6.2 +++ b/mwlib/metabook.py	Thu Jul 03 17:11:44 2008 +0200
     6.3 @@ -20,7 +20,7 @@
     6.4          metabook['subtitle'] = subtitle
     6.5      return metabook
     6.6  
     6.7 -def make_source(name=None, url=None):
     6.8 +def make_source(name=None, url=None, language=None):
     6.9      source = {
    6.10          'type': 'source',
    6.11          'system': 'MediaWiki',
    6.12 @@ -29,6 +29,8 @@
    6.13          source['name'] = name
    6.14      if url:
    6.15          source['url'] = url
    6.16 +    if language:
    6.17 +        source['language'] = language
    6.18      return source
    6.19  
    6.20  def make_article(title=None, displaytitle=None, content_type='text/x-wiki'):
     7.1 --- a/mwlib/mwapidb.py	Thu Jul 03 17:11:35 2008 +0200
     7.2 +++ b/mwlib/mwapidb.py	Thu Jul 03 17:11:44 2008 +0200
     7.3 @@ -416,6 +416,7 @@
     7.4          self.template_blacklist = []
     7.5          if template_blacklist is not None:
     7.6              self.setTemplateBlacklist(template_blacklist)
     7.7 +        self.source = None
     7.8      
     7.9      def setTemplateBlacklist(self, template_blacklist):
    7.10          raw = self.getRawArticle(template_blacklist)
    7.11 @@ -525,14 +526,18 @@
    7.12          except KeyError:
    7.13              return None
    7.14      
    7.15 -    def getMetaData(self):
    7.16 +    def getSource(self):
    7.17 +        if self.source is not None:
    7.18 +            return self.source
    7.19          result = self.api_helper.query(meta='siteinfo')
    7.20          try:
    7.21              g = result['general']
    7.22 -            return metabook.make_source(
    7.23 +            self.source = metabook.make_source(
    7.24                  url=g['base'],
    7.25                  name='%s (%s)' % (g['sitename'], g['lang']),
    7.26 +                language=g['lang'],
    7.27              )
    7.28 +            return self.source
    7.29          except KeyError:
    7.30              return None
    7.31      
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/mwlib/namespace.py	Thu Jul 03 17:11:44 2008 +0200
     8.3 @@ -0,0 +1,75 @@
     8.4 +from mwlib.namespace_langs import lang_ns_data as _lang_ns_data
     8.5 +
     8.6 +NS_MEDIA          = -2
     8.7 +NS_SPECIAL        = -1
     8.8 +NS_MAIN           =  0
     8.9 +NS_TALK           =  1
    8.10 +NS_USER           =  2
    8.11 +NS_USER_TALK      =  3
    8.12 +NS_PROJECT        =  4
    8.13 +NS_PROJECT_TALK   =  5
    8.14 +NS_IMAGE          =  6
    8.15 +NS_IMAGE_TALK     =  7
    8.16 +NS_MEDIAWIKI      =  8
    8.17 +NS_MEDIAWIKI_TALK =  9
    8.18 +NS_TEMPLATE       = 10
    8.19 +NS_TEMPLATE_TALK  = 11
    8.20 +NS_HELP           = 12
    8.21 +NS_HELP_TALK      = 13
    8.22 +NS_CATEGORY       = 14
    8.23 +NS_CATEGORY_TALK  = 15
    8.24 +
    8.25 +namespace_maps = {}
    8.26 +
    8.27 +def add_namespace_map(key, lang, project_name, extras={}):
    8.28 +    ns_data = _lang_ns_data[lang]
    8.29 +    res = dict(zip(ns_data, _lang_ns_data_keys))
    8.30 +    res[project_name] = NS_PROJECT
    8.31 +    res[ns_data[-1] % project_name] = NS_PROJECT_TALK
    8.32 +    res.update(extras)
    8.33 +    namespace_maps[key] = res
    8.34 +
    8.35 +_lang_ns_data_keys = [
    8.36 +    NS_TALK, NS_USER, NS_USER_TALK, NS_IMAGE, NS_IMAGE_TALK,
    8.37 +    NS_MEDIAWIKI, NS_MEDIAWIKI_TALK, NS_TEMPLATE, NS_TEMPLATE_TALK,
    8.38 +    NS_HELP, NS_HELP_TALK, NS_CATEGORY, NS_CATEGORY_TALK, NS_SPECIAL, NS_MEDIA
    8.39 +]
    8.40 +
    8.41 +add_namespace_map('enwiki', 'en', 'Wikipedia',
    8.42 +        {'Portal': 100, 'Portal_Talk': 101})
    8.43 +add_namespace_map('dewiki', 'de', 'Wikipedia',
    8.44 +        {'Portal': 100, 'Portal_Diskussion': 101})
    8.45 +for lang in _lang_ns_data:
    8.46 +    add_namespace_map('%s+en_mw' % lang, lang, 'MediaWiki', namespace_maps['enwiki'])
    8.47 +
    8.48 +namespace_maps['default'] = dict(namespace_maps['enwiki'].items() + namespace_maps['dewiki'].items())
    8.49 +
    8.50 +# external wikis:
    8.51 +
    8.52 +interwiki_map = {
    8.53 +    'wikipedia': 'wikipedia',
    8.54 +    'w': 'wikipedia',
    8.55 +    'wiktionary': 'wiktionary',
    8.56 +    'wikt': 'wiktionary',
    8.57 +    'wikinews': 'wikinews',
    8.58 +    'n': 'wikinews',
    8.59 +    'wikibooks': 'wikibooks',
    8.60 +    'b': 'wikibooks',
    8.61 +    'wikiquote': 'wikiquote',
    8.62 +    'q': 'wikiquote',
    8.63 +    'wikisource': 'wikisource',
    8.64 +    's': 'wikisource',
    8.65 +    'wikispecies': 'wikispecies',
    8.66 +    'species': 'wikispecies',
    8.67 +    'v': 'wikiversity',
    8.68 +    'wikimedia': 'wikimedia',
    8.69 +    'foundation': 'wikimedia',
    8.70 +    'commons': 'commons',
    8.71 +    'meta': 'meta',
    8.72 +    'm': 'meta',
    8.73 +    'incubator': 'incubator',
    8.74 +    'mw': 'mw',
    8.75 +    'mediazilla': 'mediazilla',
    8.76 +
    8.77 +    'wikitravel': 'wikitravel',
    8.78 +}
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/mwlib/namespace_langs.py	Thu Jul 03 17:11:44 2008 +0200
     9.3 @@ -0,0 +1,76 @@
     9.4 +lang_ns_data = {
     9.5 +'af': [u'Bespreking', u'Gebruiker', u'Gebruikerbespreking', u'Beeld', u'Beeldbespreking', u'MediaWiki', u'MediaWikibespreking', u'Sjabloon', u'Sjabloonbespreking', u'Hulp', u'Hulpbespreking', u'Kategorie', u'Kategoriebespreking', u'Spesiaal', u'Media', u'%sbespreking'],
     9.6 +'an': [u'Descusi\xf3n', u'Usuario', u'Descusi\xf3n_usuario', u'Imachen', u'Descusi\xf3n_imachen', u'MediaWiki', u'Descusi\xf3n_MediaWiki', u'Plantilla', u'Descusi\xf3n_plantilla', u'Aduya', u'Descusi\xf3n_aduya', u'Categor\xeda', u'Descusi\xf3n_categor\xeda', u'Espezial', u'Media', u'Descusi\xf3n_%s'],
     9.7 +'ar': [u'\u0646\u0642\u0627\u0634', u'\u0645\u0633\u062a\u062e\u062f\u0645', u'\u0646\u0642\u0627\u0634_\u0627\u0644\u0645\u0633\u062a\u062e\u062f\u0645', u'\u0635\u0648\u0631\u0629', u'\u0646\u0642\u0627\u0634_\u0627\u0644\u0635\u0648\u0631\u0629', u'\u0645\u064a\u062f\u064a\u0627\u0648\u064a\u0643\u064a', u'\u0646\u0642\u0627\u0634_\u0645\u064a\u062f\u064a\u0627\u0648\u064a\u0643\u064a', u'\u0642\u0627\u0644\u0628', u'\u0646\u0642\u0627\u0634_\u0642\u0627\u0644\u0628', u'\u0645\u0633\u0627\u0639\u062f\u0629', u'\u0646\u0642\u0627\u0634_\u0627\u0644\u0645\u0633\u0627\u0639\u062f\u0629', u'\u062a\u0635\u0646\u064a\u0641', u'\u0646\u0642\u0627\u0634_\u0627\u0644\u062a\u0635\u0646\u064a\u0641', u'\u062e\u0627\u0635', u'\u0645\u0644\u0641', u"\u0646\u0642\u0627\u0634' . '_%s"],
     9.8 +'az': [u'M\xfczakir\u0259', u'\u0130stifad\u0259\xe7i', u'\u0130stifad\u0259\xe7i_m\xfczakir\u0259si', u'\u015e\u0259kil', u'\u015e\u0259kil_m\xfczakir\u0259si', u'MediyaViki', u'MediyaViki_m\xfczakir\u0259si', u'\u015eablon', u'\u015eablon_m\xfczakir\u0259si', u'K\xf6m\u0259k', u'K\xf6m\u0259k_m\xfczakir\u0259si', u'Kateqoriya', u'Kateqoriya_m\xfczakir\u0259si', u'X\xfcsusi', u'Mediya', u'%s_m\xfczakir\u0259si'],
     9.9 +'ba': [u'\u0424\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u04a0\u0430\u0442\u043d\u0430\u0448\u044b\u0443\u0441\u044b', u'\u04a0\u0430\u0442\u043d\u0430\u0448\u044b\u0443\u0441\u044b_\u043c-\u043d_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u0420\u04d9\u0441\u0435\u043c', u'\u0420\u04d9\u0441\u0435\u043c_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'MediaWiki', u'MediaWiki_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u04a0\u0430\u043b\u044b\u043f', u'\u04a0\u0430\u043b\u044b\u043f_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u0411\u0435\u043b\u0435\u0448\u043c\u04d9', u'\u0411\u0435\u043b\u0435\u0448\u043c\u04d9_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u044f', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u044f_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u042f\u0440\u0499\u0430\u043c\u0441\u044b', u'\u041c\u0435\u0434\u0438\u0430', u'%s_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443'],
    9.10 +'be': [u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435', u'\u0423\u0434\u0437\u0435\u043b\u044c\u043d\u0456\u043a', u'\u0413\u0443\u0442\u0430\u0440\u043a\u0456_\u045e\u0434\u0437\u0435\u043b\u044c\u043d\u0456\u043a\u0430', u'\u0412\u044b\u044f\u0432\u0430', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_\u0432\u044b\u044f\u0432\u044b', u'MediaWiki', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_MediaWiki', u'\u0428\u0430\u0431\u043b\u0451\u043d', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_\u0448\u0430\u0431\u043b\u0451\u043d\u0443', u'\u0414\u0430\u043f\u0430\u043c\u043e\u0433\u0430', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_\u0434\u0430\u043f\u0430\u043c\u043e\u0433\u0456', u'\u041a\u0430\u0442\u044d\u0433\u043e\u0440\u044b\u044f', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_\u043a\u0430\u0442\u044d\u0433\u043e\u0440\u044b\u0456', u'\u0421\u043f\u044d\u0446\u044b\u044f\u043b\u044c\u043d\u044b\u044f', u'\u041c\u044d\u0434\u044b\u044f', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_%s'],
    9.11 +'bg': [u'\u0411\u0435\u0441\u0435\u0434\u0430', u'\u041f\u043e\u0442\u0440\u0435\u0431\u0438\u0442\u0435\u043b', u'\u041f\u043e\u0442\u0440\u0435\u0431\u0438\u0442\u0435\u043b_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u041a\u0430\u0440\u0442\u0438\u043d\u043a\u0430', u'\u041a\u0430\u0440\u0442\u0438\u043d\u043a\u0430_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u041c\u0435\u0434\u0438\u044f\u0423\u0438\u043a\u0438', u'\u041c\u0435\u0434\u0438\u044f\u0423\u0438\u043a\u0438_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u0428\u0430\u0431\u043b\u043e\u043d_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u041f\u043e\u043c\u043e\u0449', u'\u041f\u043e\u043c\u043e\u0449_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u044f', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u044f_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u0421\u043f\u0435\u0446\u0438\u0430\u043b\u043d\u0438', u'\u041c\u0435\u0434\u0438\u044f', u'%s_\u0431\u0435\u0441\u0435\u0434\u0430'],
    9.12 +'br': [u'Kaozeal', u'Implijer', u'Kaozeadenn_Implijer', u'Skeudenn', u'Kaozeadenn_Skeudenn', u'MediaWiki', u'Kaozeadenn_MediaWiki', u'Patrom', u'Kaozeadenn_Patrom',u'Skoazell', u'Kaozeadenn_Skoazell', u'Rummad', u'Kaozeadenn_Rummad', u'Dibar', u'Media', u'Kaozeadenn_%s'],
    9.13 +'bs': [u'Razgovor', u'Korisnik', u'Razgovor_sa_korisnikom', u'Slika', u'Razgovor_o_slici', u'MedijaViki', u'Razgovor_o_MedijaVikiju', u'\u0160ablon', u'Razgovor_o_\u0161ablonu', u'Pomo\u0107', u'Razgovor_o_pomo\u0107i', u'Kategorija', u'Razgovor_o_kategoriji', u'Posebno', u'Medija', u'Razgovor_{{grammar:instrumental|%s}}'],
    9.14 +'ca': [u'Discussi\xf3', u'Usuari', u'Usuari_Discussi\xf3', u'Imatge', u'Imatge_Discussi\xf3', u'MediaWiki', u'MediaWiki_Discussi\xf3', u'Plantilla', u'Plantilla_Discussi\xf3', u'Ajuda', u'Ajuda_Discussi\xf3', u'Categoria', u'Categoria_Discussi\xf3', u'Especial', u'Media', u'%s_Discussi\xf3'],
    9.15 +'cs': [u'Diskuse', u'U\u017eivatel', u'U\u017eivatel_diskuse', u'Soubor', u'Soubor_diskuse', u'MediaWiki', u'MediaWiki_diskuse', u'\u0160ablona', u'\u0160ablona_diskuse', u'N\xe1pov\u011bda', u'N\xe1pov\u011bda_diskuse', u'Kategorie', u'Kategorie_diskuse', u'Speci\xe1ln\xed', u'M\xe9dia', u'%s_diskuse'],
    9.16 +'cv': [u'\u0421\u04f3\u0442\u0441\u0435 \u044f\u0432\u0430\u0441\u0441\u0438', u'\u0425\u0443\u0442\u0448\u0103\u043d\u0430\u043a\u0430\u043d', u'\u0425\u0443\u0442\u0448\u0103\u043d\u0430\u043a\u0430\u043d\u0103\u043d_\u043a\u0430\u043d\u0430\u0448\u043b\u0443_\u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0438', u'\u04f2\u043a\u0435\u0440\u0447\u0115\u043a', u'\u04f2\u043a\u0435\u0440\u0447\u0115\u043a\u0435_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438', u'MediaWiki', u'MediaWiki_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u0428\u0430\u0431\u043b\u043e\u043d\u0430_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438', u'\u041f\u0443\u043b\u0103\u0448\u0443', u'\u041f\u0443\u043b\u0103\u0448\u0103\u0432\u0430_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u043d\u0435_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438', u'\u042f\u0442\u0430\u0440\u043b\u0103', u'\u041c\u0435\u0434\u0438\u0430', u'%s_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438'],
    9.17 +'cy': [u'Sgwrs', u'Defnyddiwr', u'Sgwrs_Defnyddiwr', u'Delwedd', u'Sgwrs_Delwedd', u'MediaWici', u'Sgwrs_MediaWici', u'Nodyn', u'Sgwrs_Nodyn', u'Cymorth', u'Sgwrs Cymorth', u'Categori', u'Sgwrs_Categori', u'Arbennig', u'Media', u'Sgwrs_%s'],
    9.18 +'da': [u'Diskussion', u'Bruger', u'Brugerdiskussion', u'Billede', u'Billeddiskussion', u'MediaWiki', u'MediaWiki-diskussion', u'Skabelon', u'Skabelondiskussion', u'Hj\xe6lp', u'Hj\xe6lp-diskussion', u'Kategori', u'Kategoridiskussion', u'Speciel', u'Media', u'%s-diskussion'],
    9.19 +'de': [u'Diskussion', u'Benutzer', u'Benutzer_Diskussion', u'Bild', u'Bild_Diskussion', u'MediaWiki', u'MediaWiki_Diskussion', u'Vorlage', u'Vorlage_Diskussion', u'Hilfe', u'Hilfe_Diskussion', u'Kategorie', u'Kategorie_Diskussion', u'Spezial', u'Media', u'%s_Diskussion'],
    9.20 +'el': [u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7', u'\u03a7\u03c1\u03ae\u03c3\u03c4\u03b7\u03c2', u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7_\u03c7\u03c1\u03ae\u03c3\u03c4\u03b7', u'\u0395\u03b9\u03ba\u03cc\u03bd\u03b1', u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7_\u03b5\u03b9\u03ba\u03cc\u03bd\u03b1\u03c2', u'MediaWiki', u'MediaWiki_talk', u'\u03a0\u03c1\u03cc\u03c4\u03c5\u03c0\u03bf', u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7_\u03c0\u03c1\u03bf\u03c4\u03cd\u03c0\u03bf\u03c5', u'\u0392\u03bf\u03ae\u03b8\u03b5\u03b9\u03b1', u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7_\u03b2\u03bf\u03ae\u03b8\u03b5\u03b9\u03b1\u03c2', u'\u039a\u03b1\u03c4\u03b7\u03b3\u03bf\u03c1\u03af\u03b1', u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7_\u03ba\u03b1\u03c4\u03b7\u03b3\u03bf\u03c1\u03af\u03b1\u03c2', u'\u0395\u03b9\u03b4\u03b9\u03ba\u03cc', u'\u039c\u03ad\u03c3\u03bf\u03bd', u'%s_\u03c3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7'],
    9.21 +'en': [u'Talk', u'User', u'User_talk', u'Image', u'Image_talk', u'MediaWiki', u'MediaWiki_talk', u'Template', u'Template_talk', u'Help', u'Help_talk', u'Category', u'Category_talk', u'Special', u'Media', u'%s_talk'],
    9.22 +'eo': [u'Diskuto', u'Vikipediisto', u'Vikipediista_diskuto', u'Dosiero', u'Dosiera_diskuto', u'MediaWiki', u'MediaWiki_diskuto', u'\u015cablono', u'\u015cablona_diskuto', u'Helpo', u'Helpa_diskuto', u'Kategorio', u'Kategoria_diskuto', u'Speciala', u'Media', u'%s_diskuto'],
    9.23 +'es': [u'Discusi\xf3n', u'Usuario', u'Usuario_Discusi\xf3n', u'Imagen', u'Imagen_Discusi\xf3n', u'MediaWiki', u'MediaWiki_Discusi\xf3n', u'Plantilla', u'Plantilla_Discusi\xf3n', u'Ayuda', u'Ayuda_Discusi\xf3n', u'Categor\xeda', u'Categor\xeda_Discusi\xf3n', u'Especial', u'Media', u'%s_Discusi\xf3n'],
    9.24 +'et': [u'Arutelu', u'Kasutaja', u'Kasutaja_arutelu', u'Pilt', u'Pildi_arutelu', u'MediaWiki', u'MediaWiki_arutelu', u'Mall', u'Malli_arutelu', u'Juhend', u'Juhendi_arutelu', u'Kategooria', u'Kategooria_arutelu', u'Eri', u'Meedia', u'%s_arutelu'],
    9.25 +'eu': [u'Eztabaida', u'Lankide', u'Lankide_eztabaida', u'Irudi', u'Irudi_eztabaida', u'MediaWiki', u'MediaWiki_eztabaida', u'Txantiloi', u'Txantiloi_eztabaida', u'Laguntza', u'Laguntza_eztabaida', u'Kategoria', u'Kategoria_eztabaida', u'Aparteko', u'Media', u'%s_eztabaida'],
    9.26 +'fa': [u'\u0628\u062d\u062b', u'\u06a9\u0627\u0631\u0628\u0631', u'\u0628\u062d\u062b_\u06a9\u0627\u0631\u0628\u0631', u'\u062a\u0635\u0648\u06cc\u0631', u'\u0628\u062d\u062b_\u062a\u0635\u0648\u06cc\u0631', u'\u0645\u062f\u06cc\u0627\u0648\u06cc\u06a9\u06cc', u'\u0628\u062d\u062b_\u0645\u062f\u06cc\u0627\u0648\u06cc\u06a9\u06cc', u'\u0627\u0644\u06af\u0648', u'\u0628\u062d\u062b_\u0627\u0644\u06af\u0648', u'\u0631\u0627\u0647\u0646\u0645\u0627', u'\u0628\u062d\u062b_\u0631\u0627\u0647\u0646\u0645\u0627', u'\u0631\u062f\u0647', u'\u0628\u062d\u062b_\u0631\u062f\u0647', u'\u0648\u06cc\u0698\u0647', u'\u0645\u062f\u06cc\u0627', u'\u0628\u062d\u062b_%s'],
    9.27 +'fi': [u'Keskustelu', u'K\xe4ytt\xe4j\xe4', u'Keskustelu_k\xe4ytt\xe4j\xe4st\xe4', u'Kuva', u'Keskustelu_kuvasta', u'MediaWiki', u'MediaWiki_talk', u'Malline', u'Keskustelu_mallineesta', u'Ohje', u'Keskustelu_ohjeesta', u'Luokka', u'Keskustelu_luokasta', u'Toiminnot', u'Media', u'Keskustelu_{{grammar:elative|%s}}'],
    9.28 +'fo': [u'Kjak', u'Br\xfakari', u'Br\xfakari_kjak', u'Mynd', u'Mynd_kjak', u'MidiaWiki', u'MidiaWiki_kjak', u'Fyrimynd', u'Fyrimynd_kjak', u'Hj\xe1lp', u'Hj\xe1lp_kjak', u'B\xf3lkur', u'B\xf3lkur_kjak', u'Serstakur', u'Mi\xf0il', u'%s_kjak'],
    9.29 +'fr': [u'Discuter', u'Utilisateur', u'Discussion_Utilisateur', u'Image', u'Discussion_Image', u'MediaWiki', u'Discussion_MediaWiki', u'Mod\xe8le', u'Discussion_Mod\xe8le', u'Aide', u'Discussion_Aide', u'Cat\xe9gorie', u'Discussion_Cat\xe9gorie', u'Special', u'Media', u'Discussion_%s'],
    9.30 +'fy': [u'Oerlis', u'Meidogger', u'Meidogger_oerlis', u'Ofbyld', u'Ofbyld_oerlis', u'MediaWiki', u'MediaWiki_oerlis', u'Berjocht', u'Berjocht_oerlis', u'Hulp', u'Hulp_oerlis', u'Kategory', u'Kategory_oerlis', u'Wiki', u'Media', u'%s_oerlis'],
    9.31 +'ga': [u'Pl\xe9', u'\xdas\xe1ideoir', u'Pl\xe9_\xfas\xe1ideora', u'\xcdomh\xe1', u'Pl\xe9_\xedomh\xe1', u'MediaWiki', u'Pl\xe9_MediaWiki', u'Teimpl\xe9ad', u'Pl\xe9_teimpl\xe9id', u'Cabhair', u'Pl\xe9_cabhrach', u'Catag\xf3ir', u'Pl\xe9_catag\xf3ire', u'Speisialta', u'Me\xe1n', u'Pl\xe9_{{grammar:genitive|%s}}'],
    9.32 +'he': [u'\u05e9\u05d9\u05d7\u05d4', u'\u05de\u05e9\u05ea\u05de\u05e9', u'\u05e9\u05d9\u05d7\u05ea_\u05de\u05e9\u05ea\u05de\u05e9', u'\u05ea\u05de\u05d5\u05e0\u05d4', u'\u05e9\u05d9\u05d7\u05ea_\u05ea\u05de\u05d5\u05e0\u05d4', u'\u05de\u05d3\u05d9\u05d4_\u05d5\u05d9\u05e7\u05d9', u'\u05e9\u05d9\u05d7\u05ea_\u05de\u05d3\u05d9\u05d4_\u05d5\u05d9\u05e7\u05d9', u'\u05ea\u05d1\u05e0\u05d9\u05ea', u'\u05e9\u05d9\u05d7\u05ea_\u05ea\u05d1\u05e0\u05d9\u05ea', u'\u05e2\u05d6\u05e8\u05d4', u'\u05e9\u05d9\u05d7\u05ea_\u05e2\u05d6\u05e8\u05d4', u'\u05e7\u05d8\u05d2\u05d5\u05e8\u05d9\u05d4', u'\u05e9\u05d9\u05d7\u05ea_\u05e7\u05d8\u05d2\u05d5\u05e8\u05d9\u05d4', u'\u05de\u05d9\u05d5\u05d7\u05d3', u'\u05de\u05d3\u05d9\u05d4', u'\u05e9\u05d9\u05d7\u05ea_%s'],
    9.33 +'hi': [u'\u0935\u093e\u0930\u094d\u0924\u093e', u'\u0938\u0926\u0938\u094d\u092f', u'\u0938\u0926\u0938\u094d\u092f_\u0935\u093e\u0930\u094d\u0924\u093e', u'\u091a\u093f\u0924\u094d\u0930', u'\u091a\u093f\u0924\u094d\u0930_\u0935\u093e\u0930\u094d\u0924\u093e', u'MediaWiki', u'MediaWiki_talk', u'Template', u'Template_talk', u'Help', u'Help_Talk', u'\u0936\u094d\u0930\u0947\u0923\u0940', u'\u0936\u094d\u0930\u0947\u0923\u0940_\u0935\u093e\u0930\u094d\u0924\u093e', u'\u0935\u093f\u0936\u0947\u0937', u'Media', u'%s_\u0935\u093e\u0930\u094d\u0924\u093e'],
    9.34 +'hr': [u'Razgovor', u'Suradnik', u'Razgovor_sa_suradnikom', u'Slika', u'Razgovor_o_slici', u'MediaWiki', u'MediaWiki_razgovor', u'Predlo\u017eak', u'Razgovor_o_predlo\u0161ku', u'Pomo\u0107', u'Razgovor_o_pomo\u0107i', u'Kategorija', u'Razgovor_o_kategoriji', u'Posebno', u'Mediji', u'Razgovor_%s'],
    9.35 +'hu': [u'Vita', u'User', u'User_vita', u'K\xe9p', u'K\xe9p_vita', u'MediaWiki', u'MediaWiki_vita', u'Sablon', u'Sablon_vita', u'Seg\xedts\xe9g', u'Seg\xedts\xe9g_vita', u'Kateg\xf3ria', u'Kateg\xf3ria_vita', u'Speci\xe1lis', u'M\xe9dia', u'%s_vita'],
    9.36 +'ia': [u'Discussion', u'Usator', u'Discussion_Usator', u'Imagine', u'Discussion_Imagine', u'MediaWiki', u'Discussion_MediaWiki', u'Patrono', u'Discussion_Patrono', u'Adjuta', u'Discussion_Adjuta', u'Categoria', u'Discussion_Categoria', u'Special', u'Media', u'Discussion_%s'],
    9.37 +'id': [u'Pembicaraan', u'Pengguna', u'Pembicaraan_Pengguna', u'Berkas', u'Pembicaraan_Berkas', u'MediaWiki', u'Pembicaraan_MediaWiki', u'Templat', u'Pembicaraan_Templat', u'Bantuan', u'Pembicaraan_Bantuan', u'Kategori', u'Pembicaraan_Kategori', u'Istimewa', u'Media', u'Pembicaraan_%s'],
    9.38 +'is': [u'Spjall', u'Notandi', u'Notandaspjall', u'Mynd', u'Myndaspjall', u'Melding', u'Meldingarspjall', u'Sni\xf0', u'Sni\xf0aspjall', u'Hj\xe1lp', u'Hj\xe1lparspjall', u'Flokkur', u'Flokkaspjall', u'Kerfiss\xed\xf0a', u'Mi\xf0ill', u'%sspjall'],
    9.39 +'it': [u'Discussione', u'Utente', u'Discussioni_utente', u'Immagine', u'Discussioni_immagine', u'MediaWiki', u'Discussioni_MediaWiki', u'Template', u'Discussioni_template', u'Aiuto', u'Discussioni_aiuto', u'Categoria', u'Discussioni_categoria', u'Speciale', u'Media', u'Discussioni_%s'],
    9.40 +'ja': [u'\u30ce\u30fc\u30c8', u'\u5229\u7528\u8005', u'\u5229\u7528\u8005\u2010\u4f1a\u8a71', u'\u753b\u50cf', u'\u753b\u50cf\u2010\u30ce\u30fc\u30c8', u'MediaWiki', u'MediaWiki\u2010\u30ce\u30fc\u30c8', u'Template', u'Template\u2010\u30ce\u30fc\u30c8', u'Help', u'Help\u2010\u30ce\u30fc\u30c8', u'Category', u'Category\u2010\u30ce\u30fc\u30c8', u'\u7279\u5225', u'Media', u'%s\u2010\u30ce\u30fc\u30c8'],
    9.41 +'jv': [u'Dhiskusi', u'Panganggo', u'Dhiskusi_Panganggo', u'Gambar', u'Dhiskusi_Gambar', u'MediaWiki', u'Dhiskusi_MediaWiki', u'Cithakan', u'Dhiskusi_Cithakan', u'Pitulung', u'Dhiskusi_Pitulung', u'Kategori', u'Dhiskusi_Kategori', u'Astamiwa', u'Media', u'Dhiskusi_%s'],
    9.42 +'ka': [u'\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10db\u10dd\u10db\u10ee\u10db\u10d0\u10e0\u10d4\u10d1\u10d4\u10da\u10d8', u'\u10db\u10dd\u10db\u10ee\u10db\u10d0\u10e0\u10d4\u10d1\u10d4\u10da\u10d8_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10e1\u10e3\u10e0\u10d0\u10d7\u10d8', u'\u10e1\u10e3\u10e0\u10d0\u10d7\u10d8_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10db\u10d4\u10d3\u10d8\u10d0\u10d5\u10d8\u10d9\u10d8', u'\u10db\u10d4\u10d3\u10d8\u10d0\u10d5\u10d8\u10d9\u10d8_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10d7\u10d0\u10e0\u10d2\u10d8', u'\u10d7\u10d0\u10e0\u10d2\u10d8_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10d3\u10d0\u10ee\u10db\u10d0\u10e0\u10d4\u10d1\u10d0', u'\u10d3\u10d0\u10ee\u10db\u10d0\u10e0\u10d4\u10d1\u10d0_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10d9\u10d0\u10e2\u10d4\u10d2\u10dd\u10e0\u10d8\u10d0', u'\u10d9\u10d0\u10e2\u10d4\u10d2\u10dd\u10e0\u10d8\u10d0_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10e1\u10de\u10d4\u10ea\u10d8\u10d0\u10da\u10e3\u10e0\u10d8', u'\u10db\u10d4\u10d3\u10d8\u10d0', u'%s_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0'],
    9.43 +'kn': [u'\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6\u0caa\u0cc1\u0c9f', u'\u0cb8\u0ca6\u0cb8\u0ccd\u0caf', u'\u0cb8\u0ca6\u0cb8\u0ccd\u0caf\u0cb0_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6\u0caa\u0cc1\u0c9f', u'\u0c9a\u0cbf\u0ca4\u0ccd\u0cb0', u'\u0c9a\u0cbf\u0ca4\u0ccd\u0cb0_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6\u0caa\u0cc1\u0c9f', u'\u0cae\u0cc0\u0ca1\u0cbf\u0caf\u0cb5\u0cbf\u0c95\u0cbf', u'\u0cae\u0cc0\u0ca1\u0cc0\u0caf\u0cb5\u0cbf\u0c95\u0cbf_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6', u'\u0c9f\u0cc6\u0c82\u0caa\u0ccd\u0cb2\u0cc7\u0c9f\u0cc1', u'\u0c9f\u0cc6\u0c82\u0caa\u0ccd\u0cb2\u0cc7\u0c9f\u0cc1_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6', u'\u0cb8\u0cb9\u0cbe\u0caf', u'\u0cb8\u0cb9\u0cbe\u0caf_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6', u'\u0cb5\u0cb0\u0ccd\u0c97', u'\u0cb5\u0cb0\u0ccd\u0c97_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6', u'\u0cb5\u0cbf\u0cb6\u0cc7\u0cb7', u'\u0cae\u0cc0\u0ca1\u0cbf\u0caf', u'%s_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6'],
    9.44 +'ko': [u'\ud1a0\ub860', u'\uc0ac\uc6a9\uc790', u'\uc0ac\uc6a9\uc790\ud1a0\ub860', u'\uadf8\ub9bc', u'\uadf8\ub9bc\ud1a0\ub860', u'MediaWiki', u'MediaWiki\ud1a0\ub860', u'\ud2c0', u'\ud2c0\ud1a0\ub860', u'\ub3c4\uc6c0\ub9d0', u'\ub3c4\uc6c0\ub9d0\ud1a0\ub860', u'\ubd84\ub958', u'\ubd84\ub958\ud1a0\ub860', u'\ud2b9\uc218\uae30\ub2a5', u'Media', u'%s\ud1a0\ub860'],
    9.45 +'ku': [u'N\xeeqa\u015f', u'Bikarh\xeaner', u'Bikarh\xeaner_n\xeeqa\u015f', u'W\xeane', u'W\xeane_n\xeeqa\u015f', u'MediaWiki', u'MediaWiki_n\xeeqa\u015f', u'\u015eablon', u'\u015eablon_n\xeeqa\u015f', u'Al\xeekar\xee', u'Al\xeekar\xee_n\xeeqa\u015f', u'Kategor\xee', u'Kategor\xee_n\xeeqa\u015f', u'Taybet', u'Medya', u'%s_n\xeeqa\u015f'],
    9.46 +'la': [u'Disputatio', u'Usor', u'Disputatio_Usoris', u'Imago', u'Disputatio_Imaginis', u'MediaWiki', u'Disputatio_MediaWiki', u'Formula', u'Disputatio_Formulae', u'Auxilium', u'Disputatio_Auxilii', u'Categoria', u'Disputatio_Categoriae', u'Specialis', u'Media', u'Disputatio_{{grammar:genitive|%s}}'],
    9.47 +'li': [u'Euverlik', u'Gebroeker', u'Euverlik_gebroeker', u'Aafbeilding', u'Euverlik_afbeelding', u'MediaWiki', u'Euverlik_MediaWiki', u'Sjabloon', u'Euverlik_sjabloon', u'Help', u'Euverlik_help', u'Kategorie', u'Euverlik_kategorie', u'Speciaal', u'Media', u'Euverlik_%s'],
    9.48 +'lt': [u'Aptarimas', u'Naudotojas', u'Naudotojo_aptarimas', u'Vaizdas', u'Vaizdo_aptarimas', u'MediaWiki', u'MediaWiki_aptarimas', u'\u0160ablonas', u'\u0160ablono_aptarimas', u'Pagalba', u'Pagalbos_aptarimas', u'Kategorija', u'Kategorijos_aptarimas', u'Specialus', u'Medija', u'%s_aptarimas'],
    9.49 +'lv': [u'Diskusija', u'Lietot\u0101js', u'Lietot\u0101ja_diskusija', u'Att\u0113ls', u'Att\u0113la_diskusija', u'MediaWiki', u'MediaWiki_diskusija', u'Veidne', u'Veidnes_diskusija', u'Pal\u012bdz\u012bba', u'Pal\u012bdz\u012bbas_diskusija', u'Kategorija', u'Kategorijas_diskusija', u'Special', u'Media', u'{{grammar:\u0123enit\u012bvs|%s}}_diskusija'],
    9.50 +'mk': [u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440', u'\u041a\u043e\u0440\u0438\u0441\u043d\u0438\u043a', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0441\u043e_\u043a\u043e\u0440\u0438\u0441\u043d\u0438\u043a', u'\u0421\u043b\u0438\u043a\u0430', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_\u0441\u043b\u0438\u043a\u0430', u'\u041c\u0435\u0434\u0438\u0458\u0430\u0412\u0438\u043a\u0438', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_\u041c\u0435\u0434\u0438\u0458\u0430\u0412\u0438\u043a\u0438', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_\u0448\u0430\u0431\u043b\u043e\u043d', u'\u041f\u043e\u043c\u043e\u0448', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_\u043f\u043e\u043c\u043e\u0448', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u0458\u0430', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_\u043a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u0458\u0430', u'\u0421\u043f\u0435\u0446\u0438\u0458\u0430\u043b\u043d\u0438', u'\u041c\u0435\u0434\u0438\u0458\u0430', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_%s'],
    9.51 +'ms': [u'Perbualan', u'Pengguna', u'Perbualan_Pengguna', u'Imej', u'Imej_Perbualan', u'MediaWiki', u'MediaWiki_Perbualan', u'Templat', u'Perbualan_Templat', u'Bantuan', u'Perbualan_Bantuan', u'Kategori', u'Perbualan_Kategori', u'Istimewa', u'Media', u'Perbualan_%s'],
    9.52 +'nl': [u'Overleg', u'Gebruiker', u'Overleg_gebruiker', u'Afbeelding', u'Overleg_afbeelding', u'MediaWiki', u'Overleg_MediaWiki', u'Sjabloon', u'Overleg_sjabloon', u'Help', u'Overleg_help', u'Categorie', u'Overleg_categorie', u'Speciaal', u'Media', u'Overleg_%s'],
    9.53 +'nn': [u'Diskusjon', u'Brukar', u'Brukardiskusjon', u'Fil', u'Fildiskusjon', u'MediaWiki', u'MediaWiki-diskusjon', u'Mal', u'Maldiskusjon', u'Hjelp', u'Hjelpdiskusjon', u'Kategori', u'Kategoridiskusjon', u'Spesial', u'Filpeikar', u'%s-diskusjon'],
    9.54 +'no': [u'Diskusjon', u'Bruker', u'Brukerdiskusjon', u'Bilde', u'Bildediskusjon', u'MediaWiki', u'MediaWiki-diskusjon', u'Mal', u'Maldiskusjon', u'Hjelp', u'Hjelpdiskusjon', u'Kategori', u'Kategoridiskusjon', u'Spesial', u'Medium', u'%s-diskusjon'],
    9.55 +'nv': [u"Naaltsoos_baa_yin\xedsht\\'\u012f\u0301", u"Choinish\\'\u012f\u012fh\xed", u"Choinish\\'\u012f\u012fh\xed_baa_yin\xedsht\\'\u012f\u0301", u"E\\'elyaa\xedg\xed\xed", u"E\\'elyaa\xedg\xed\xed_baa_yin\xedsht\\'\u012f\u0301", u'MediaWiki', u"MediaWiki_baa_yin\xedsht\\'\u012f\u0301", u'Template', u'Template_talk', u"An\xe1\\'\xe1lwo\\'", u"An\xe1\\'\xe1lwo\\'_baa_yin\xedsht\\'\u012f\u0301", u"T\\'\xe1\xe1\u0142\xe1h\xe1gi_\xe1t\\'\xe9ego", u"T\\'\xe1\xe1\u0142\xe1h\xe1gi_\xe1t\\'\xe9ego_baa_yin\xedsht\\'\u012f\u0301", u'Special', u'Media', u"%s_baa_yin\xedsht\\'\u012f\u0301"],
    9.56 +'oc': [u'Discutir', u'Utilisator', u'Discutida_Utilisator', u'Imatge', u'Discutida_Imatge', u'Media\xf2iqui', u'Discutida_Media\xf2iqui', u'Mod\xe8l', u'Discutida_Mod\xe8l', u'Ajuda', u'Discutida_Ajuda', u'Categoria', u'Discutida_Categoria', u'Especial', u'Media', u'Discutida_%s'],
    9.57 +'os': [u'\u0414\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'\u0410\u0440\u0445\u0430\u0439\xe6\u0433', u'\u0410\u0440\u0445\u0430\u0439\xe6\u0434\u0436\u044b_\u0434\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'\u041d\u044b\u0432', u'\u041d\u044b\u0432\u044b_\u0442\u044b\u0445\u0445\xe6\u0439_\u0434\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'MediaWiki', u'\u0414\u0438\u0441\u043a\u0443\u0441\u0441\u0438_MediaWiki', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u0428\u0430\u0431\u043b\u043e\u043d\u044b_\u0442\u044b\u0445\u0445\xe6\u0439_\u0434\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'\xc6\u0445\u0445\u0443\u044b\u0441', u'\xc6\u0445\u0445\u0443\u044b\u0441\u044b_\u0442\u044b\u0445\u0445\xe6\u0439_\u0434\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u0439\u044b_\u0442\u044b\u0445\u0445\xe6\u0439_\u0434\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'\u0421\xe6\u0440\u043c\u0430\u0433\u043e\u043d\u0434', u'Media\', //\u0447\u0442\u043e\u0431 \u043d\u0435 \u043f\u0438\u0441\u0430\u0442\u044c "\u041c\u0443\u043b\u044c\u0442\u0438\u043c\u0435\u0434\u0438\u044f', u'\u0414\u0438\u0441\u043a\u0443\u0441\u0441\u0438_%s'],
    9.58 +'pa': [u'\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a2e\u0a48\u0a02\u0a2c\u0a30', u'\u0a2e\u0a48\u0a02\u0a2c\u0a30_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a24\u0a38\u0a35\u0a40\u0a30', u'\u0a24\u0a38\u0a35\u0a40\u0a30_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a2e\u0a40\u0a21\u0a40\u0a06\u0a35\u0a3f\u0a15\u0a3f', u'\u0a2e\u0a40\u0a21\u0a40\u0a06\u0a35\u0a3f\u0a15\u0a3f_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a28\u0a2e\u0a42\u0a28\u0a3e', u'\u0a28\u0a2e\u0a42\u0a28\u0a3e_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a2e\u0a26\u0a26', u'\u0a2e\u0a26\u0a26_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a38\u0a3c\u0a4d\u0a30\u0a47\u0a23\u0a40', u'\u0a38\u0a3c\u0a4d\u0a30\u0a47\u0a23\u0a40_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a16\u0a3e\u0a38', u'\u0a2e\u0a40\u0a21\u0a40\u0a06', u'%s_\u0a1a\u0a30\u0a1a\u0a3e'],
    9.59 +'pl': [u'Dyskusja', u'U\u017cytkownik', u'Dyskusja_u\u017cytkownika', u'Grafika', u'Dyskusja_grafiki', u'MediaWiki', u'Dyskusja_MediaWiki', u'Szablon', u'Dyskusja_szablonu', u'Pomoc', u'Dyskusja_pomocy', u'Kategoria', u'Dyskusja_kategorii', u'Specjalna', u'Media', u'Dyskusja_%s'],
    9.60 +'pt': [u'Discuss\xe3o', u'Utilizador', u'Utilizador_Discuss\xe3o', u'Imagem', u'Imagem_Discuss\xe3o', u'MediaWiki', u'MediaWiki_Discuss\xe3o', u'Predefini\xe7\xe3o', u'Predefini\xe7\xe3o_Discuss\xe3o', u'Ajuda', u'Ajuda_Discuss\xe3o', u'Categoria', u'Categoria_Discuss\xe3o', u'Especial', u'Media', u'%s_Discuss\xe3o'],
    9.61 +'ro': [u'Discu\u0163ie', u'Utilizator', u'Discu\u0163ie_Utilizator', u'Imagine', u'Discu\u0163ie_Imagine', u'MediaWiki', u'Discu\u0163ie_MediaWiki', u'Format', u'Discu\u0163ie_Format', u'Ajutor', u'Discu\u0163ie_Ajutor', u'Categorie', u'Discu\u0163ie_Categorie', u'Special', u'Media', u'Discu\u0163ie_%s'],
    9.62 +'ru': [u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435', u'\u0423\u0447\u0430\u0441\u0442\u043d\u0438\u043a', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_\u0443\u0447\u0430\u0441\u0442\u043d\u0438\u043a\u0430', u'\u0418\u0437\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u0438\u0435', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_\u0438\u0437\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u0438\u044f', u'MediaWiki', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_MediaWiki', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_\u0448\u0430\u0431\u043b\u043e\u043d\u0430', u'\u0421\u043f\u0440\u0430\u0432\u043a\u0430', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_\u0441\u043f\u0440\u0430\u0432\u043a\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u044f', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_\u043a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u0438', u'\u0421\u043b\u0443\u0436\u0435\u0431\u043d\u0430\u044f', u'\u041c\u0435\u0434\u0438\u0430', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_{{grammar:genitive|%s}}'],
    9.63 +'sk': [u'Diskusia', u'Redaktor', u'Diskusia_s_redaktorom', u'Obr\xe1zok', u'Diskusia_k_obr\xe1zku', u'MediaWiki', u'Diskusia_k_MediaWiki', u'\u0160abl\xf3na', u'Diskusia_k_\u0161abl\xf3ne', u'Pomoc', u'Diskusia_k_pomoci', u'Kateg\xf3ria', u'Diskusia_ku_kateg\xf3rii', u'\u0160peci\xe1lne', u'M\xe9di\xe1', u'Diskusia_k_{{grammar:dat\xedv|%s}}'],
    9.64 +'sl': [u'Pogovor', u'Uporabnik', u'Uporabni\u0161ki_pogovor', u'Slika', u'Pogovor_o_sliki', u'MediaWiki', u'Pogovor_o_MediaWiki', u'Predloga', u'Pogovor_o_predlogi', u'Pomo\u010d', u'Pogovor_o_pomo\u010di', u'Kategorija', u'Pogovor_o_kategoriji', u'Posebno', u'Media', u'Pogovor_{{grammar:mestnik|%s}}'],
    9.65 +'su': [u'Obrolan', u'Pamak\xe9', u'Obrolan_pamak\xe9', u'Gambar', u'Obrolan_gambar', u'MediaWiki', u'Obrolan_MediaWiki', u'Citakan', u'Obrolan_citakan', u'Pitulung', u'Obrolan_pitulung', u'Kategori', u'Obrolan_kategori', u'Husus', u'M\xe9dia', u'Obrolan_%s'],
    9.66 +'sv': [u'Diskussion', u'Anv\xe4ndare', u'Anv\xe4ndardiskussion', u'Bild', u'Bilddiskussion', u'MediaWiki', u'MediaWiki-diskussion', u'Mall', u'Malldiskussion', u'Hj\xe4lp', u'Hj\xe4lpdiskussion', u'Kategori', u'Kategoridiskussion', u'Special', u'Media', u'%sdiskussion'],
    9.67 +'ta': [u'\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0baa\u0baf\u0ba9\u0bb0\u0bcd', u'\u0baa\u0baf\u0ba9\u0bb0\u0bcd_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0baa\u0b9f\u0bbf\u0bae\u0bae\u0bcd', u'\u0baa\u0b9f\u0bbf\u0bae\u0baa\u0bcd_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0bae\u0bc0\u0b9f\u0bbf\u0baf\u0bbe\u0bb5\u0bbf\u0b95\u0bcd\u0b95\u0bbf', u'\u0bae\u0bc0\u0b9f\u0bbf\u0baf\u0bbe\u0bb5\u0bbf\u0b95\u0bcd\u0b95\u0bbf_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0bb5\u0bbe\u0bb0\u0bcd\u0baa\u0bcd\u0baa\u0bc1\u0bb0\u0bc1', u'\u0bb5\u0bbe\u0bb0\u0bcd\u0baa\u0bcd\u0baa\u0bc1\u0bb0\u0bc1_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0b89\u0ba4\u0bb5\u0bbf', u'\u0b89\u0ba4\u0bb5\u0bbf_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0baa\u0b95\u0bc1\u0baa\u0bcd\u0baa\u0bc1', u'\u0baa\u0b95\u0bc1\u0baa\u0bcd\u0baa\u0bc1_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0b9a\u0bbf\u0bb1\u0baa\u0bcd\u0baa\u0bc1', u'\u0b8a\u0b9f\u0b95\u0bae\u0bcd', u'%s_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1'],
    9.68 +'te': [u'\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c38\u0c2d\u0c4d\u0c2f\u0c41\u0c21\u0c41', u'\u0c38\u0c2d\u0c4d\u0c2f\u0c41\u0c28\u0c3f\u0c2a\u0c48_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c2c\u0c4a\u0c2e\u0c4d\u0c2e', u'\u0c2c\u0c4a\u0c2e\u0c4d\u0c2e\u0c2a\u0c48_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c2e\u0c40\u0c21\u0c3f\u0c2f\u0c3e\u0c35\u0c3f\u0c15\u0c40', u'\u0c2e\u0c40\u0c21\u0c3f\u0c2f\u0c3e\u0c35\u0c3f\u0c15\u0c40_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c2e\u0c42\u0c38', u'\u0c2e\u0c42\u0c38_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c38\u0c39\u0c3e\u0c2f\u0c2e\u0c41', u'\u0c38\u0c39\u0c3e\u0c2f\u0c2e\u0c41_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c35\u0c30\u0c4d\u0c17\u0c02', u'\u0c35\u0c30\u0c4d\u0c17\u0c02_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c2a\u0c4d\u0c30\u0c24\u0c4d\u0c2f\u0c47\u0c15', u'\u0c2e\u0c40\u0c21\u0c3f\u0c2f\u0c3e', u'%s_\u0c1a\u0c30\u0c4d\u0c1a'],
    9.69 +'tg': [u'\u0411\u0430\u04b3\u0441', u'\u041a\u043e\u0440\u0431\u0430\u0440', u'\u0411\u0430\u04b3\u0441\u0438_\u043a\u043e\u0440\u0431\u0430\u0440', u'\u0410\u043a\u0441', u'\u0411\u0430\u04b3\u0441\u0438_\u0430\u043a\u0441', u'\u041c\u0435\u0434\u0438\u0430\u0432\u0438\u043a\u0438', u'\u0411\u0430\u04b3\u0441\u0438_\u043c\u0435\u0434\u0438\u0430\u0432\u0438\u043a\u0438', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u0411\u0430\u04b3\u0441\u0438_\u0448\u0430\u0431\u043b\u043e\u043d', u'\u0420\u043e\u04b3\u043d\u0430\u043c\u043e', u'\u0411\u0430\u04b3\u0441\u0438_\u0440\u043e\u04b3\u043d\u0430\u043c\u043e', u'\u0413\u0443\u0440\u04ef\u04b3', u'\u0411\u0430\u04b3\u0441\u0438_\u0433\u0443\u0440\u04ef\u04b3', u'\u0412\u0438\u0436\u0430', u'\u041c\u0435\u0434\u0438\u0430', u'\u0411\u0430\u04b3\u0441\u0438_%s'],
    9.70 +'th': [u'\u0e1e\u0e39\u0e14\u0e04\u0e38\u0e22', u'\u0e1c\u0e39\u0e49\u0e43\u0e0a\u0e49', u'\u0e04\u0e38\u0e22\u0e01\u0e31\u0e1a\u0e1c\u0e39\u0e49\u0e43\u0e0a\u0e49', u'\u0e20\u0e32\u0e1e', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e20\u0e32\u0e1e', u'\u0e21\u0e35\u0e40\u0e14\u0e35\u0e22\u0e27\u0e34\u0e01\u0e34', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e21\u0e35\u0e40\u0e14\u0e35\u0e22\u0e27\u0e34\u0e01\u0e34', u'\u0e41\u0e21\u0e48\u0e41\u0e1a\u0e1a', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e41\u0e21\u0e48\u0e41\u0e1a\u0e1a', u'\u0e27\u0e34\u0e18\u0e35\u0e43\u0e0a\u0e49', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e27\u0e34\u0e18\u0e35\u0e43\u0e0a\u0e49', u'\u0e2b\u0e21\u0e27\u0e14\u0e2b\u0e21\u0e39\u0e48', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e27\u0e14\u0e2b\u0e21\u0e39\u0e48', u'\u0e1e\u0e34\u0e40\u0e28\u0e29', u'\u0e2a\u0e37\u0e48\u0e2d', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07%s'],
    9.71 +'tr': [u'Tart\u0131\u015fma', u'Kullan\u0131c\u0131', u'Kullan\u0131c\u0131_mesaj', u'Resim', u'Resim_tart\u0131\u015fma', u'MedyaViki', u'MedyaViki_tart\u0131\u015fma', u'\u015eablon', u'\u015eablon_tart\u0131\u015fma', u'Yard\u0131m', u'Yard\u0131m_tart\u0131\u015fma', u'Kategori', u'Kategori_tart\u0131\u015fma', u'\xd6zel', u'Media', u'%s_tart\u0131\u015fma'],
    9.72 +'tt': [u'B\xe4x\xe4s', u'\xc4\u011fz\xe4', u'\xc4\u011fz\xe4_b\xe4x\xe4se', u'R\xe4sem', u'R\xe4sem_b\xe4x\xe4se', u'MediaWiki', u'MediaWiki_b\xe4x\xe4se', u'\xdcrn\xe4k', u'\xdcrn\xe4k_b\xe4x\xe4se', u'Y\xe4rd\xe4m', u'Y\xe4rd\xe4m_b\xe4x\xe4se', u'T\xf6rkem', u'T\xf6rkem_b\xe4x\xe4se', u'Maxsus', u'Media', u'%s_b\xe4x\xe4se'],
    9.73 +'uk': [u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f', u'\u041a\u043e\u0440\u0438\u0441\u0442\u0443\u0432\u0430\u0447', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_\u043a\u043e\u0440\u0438\u0441\u0442\u0443\u0432\u0430\u0447\u0430', u'\u0417\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u043d\u044f', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_\u0437\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u043d\u044f', u'MediaWiki', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_MediaWiki', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_\u0448\u0430\u0431\u043b\u043e\u043d\u0443', u'\u0414\u043e\u0432\u0456\u0434\u043a\u0430', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_\u0434\u043e\u0432\u0456\u0434\u043a\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0456\u044f', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_\u043a\u0430\u0442\u0435\u0433\u043e\u0440\u0456\u0457', u'\u0421\u043f\u0435\u0446\u0456\u0430\u043b\u044c\u043d\u0456', u'\u041c\u0435\u0434\u0456\u0430', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_%s'],
    9.74 +'ur': [u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644', u'\u0635\u0627\u0631\u0641', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u0635\u0627\u0631\u0641', u'\u062a\u0635\u0648\u06cc\u0631', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u062a\u0635\u0648\u06cc\u0631', u'\u0645\u06cc\u0688\u06cc\u0627\u0648\u06a9\u06cc', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u0645\u06cc\u0688\u06cc\u0627\u0648\u06a9\u06cc', u'\u0633\u0627\u0646\u0686\u06c1', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u0633\u0627\u0646\u0686\u06c1', u'\u0645\u0639\u0627\u0648\u0646\u062a', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u0645\u0639\u0627\u0648\u0646\u062a', u'\u0632\u0645\u0631\u06c1', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u0632\u0645\u0631\u06c1', u'\u062e\u0627\u0635', u'\u0632\u0631\u06cc\u0639\u06c1', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_%s'],
    9.75 +'vi': [u'Th\u1ea3o_lu\u1eadn', u'Th\xe0nh_vi\xean', u'Th\u1ea3o_lu\u1eadn_Th\xe0nh_vi\xean', u'H\xecnh', u'Th\u1ea3o_lu\u1eadn_H\xecnh', u'MediaWiki', u'Th\u1ea3o_lu\u1eadn_MediaWiki', u'Ti\xeau_b\u1ea3n', u'Th\u1ea3o_lu\u1eadn_Ti\xeau_b\u1ea3n', u'Tr\u1ee3_gi\xfap', u'Th\u1ea3o_lu\u1eadn_Tr\u1ee3_gi\xfap', u'Th\u1ec3_lo\u1ea1i', u'Th\u1ea3o_lu\u1eadn_Th\u1ec3_lo\u1ea1i', u'\u0110\u1eb7c_bi\u1ec7t', u'Ph\u01b0\u01a1ng_ti\u1ec7n', u'Th\u1ea3o_lu\u1eadn_%s'],
    9.76 +'wa': [u'Copene', u'Uzeu', u'Uzeu_copene', u'Im\xe5dje', u'Im\xe5dje_copene', u'MediaWiki', u'MediaWiki_copene', u'Modele', u'Modele_copene', u'Aidance', u'Aidance_copene', u'Categoreye', u'Categoreye_copene', u'Sipeci\xe5s', u'Media', u'%s_copene'],
    9.77 +'yi': [u'\u05e8\u05e2\u05d3\u05df', u'\u05d1\u05d0\u05b7\u05e0\u05d9\u05e6\u05e2\u05e8', u'\u05d1\u05d0\u05b7\u05e0\u05d9\u05e6\u05e2\u05e8_\u05e8\u05e2\u05d3\u05df', u'\u05d1\u05d9\u05dc\u05d3', u'\u05d1\u05d9\u05dc\u05d3_\u05e8\u05e2\u05d3\u05df', u'\u05de\u05e2\u05d3\u05d9\u05e2\u05f0\u05d9\u05e7\u05d9', u'\u05de\u05e2\u05d3\u05d9\u05e2\u05f0\u05d9\u05e7\u05d9_\u05e8\u05e2\u05d3\u05df', u'\u05de\u05d5\u05e1\u05d8\u05e2\u05e8', u'\u05de\u05d5\u05e1\u05d8\u05e2\u05e8_\u05e8\u05e2\u05d3\u05df', u'\u05d4\u05d9\u05dc\u05e3', u'\u05d4\u05d9\u05dc\u05e3_\u05e8\u05e2\u05d3\u05df', u'\u05e7\u05d0\u05b7\u05d8\u05e2\u05d2\u05d0\u05b8\u05e8\u05d9\u05e2', u'\u05e7\u05d0\u05b7\u05d8\u05e2\u05d2\u05d0\u05b8\u05e8\u05d9\u05e2_\u05e8\u05e2\u05d3\u05df', u'\u05d1\u05d0\u05b7\u05d6\u05d5\u05e0\u05d3\u05e2\u05e8', u'\u05de\u05e2\u05d3\u05d9\u05e2', u'%s_\u05e8\u05e2\u05d3\u05df'],
    9.78 +}
    9.79 +
    10.1 --- a/mwlib/options.py	Thu Jul 03 17:11:35 2008 +0200
    10.2 +++ b/mwlib/options.py	Thu Jul 03 17:11:44 2008 +0200
    10.3 @@ -79,5 +79,6 @@
    10.4                      self.options.collectionpage,
    10.5                  ))
    10.6              self.metabook = metabook.parse_collection_page(wikitext)
    10.7 +            env.metabook = self.metabook
    10.8          return env
    10.9      
    11.1 --- a/mwlib/parser.py	Thu Jul 03 17:11:35 2008 +0200
    11.2 +++ b/mwlib/parser.py	Thu Jul 03 17:11:44 2008 +0200
    11.3 @@ -9,6 +9,8 @@
    11.4  
    11.5  from mwlib.scanner import tokenize, TagToken, EndTagToken
    11.6  from mwlib.log import Log
    11.7 +from mwlib.namespace import namespace_maps, interwiki_map
    11.8 +from mwlib.lang import languages
    11.9  
   11.10  log = Log("parser")
   11.11  
   11.12 @@ -193,82 +195,165 @@
   11.13  
   11.14  class Link(Node):
   11.15      target = None
   11.16 -    specialPrefixes = set(["wikipedia", "wiktionary", "wikibooks", "wikisource",
   11.17 -                           "wikiquote", "meta", "talk",
   11.18 -                           "commons", "wikinews", "template", "wikitravel", "help", "vorlage"])
   11.19 -    from mwlib.lang import languages
   11.20 +    from mwlib.namespace import NS_MAIN, NS_CATEGORY, NS_IMAGE
   11.21 +
   11.22      colon = False
   11.23  
   11.24      def hasContent(self):
   11.25          if self.target:
   11.26              return True
   11.27          return False
   11.28 +
   11.29 +    @classmethod
   11.30 +    def _buildSpecializeMap(cls, namespaces, interwikis, langs):
   11.31 +        """
   11.32 +        Returns a dict mapping namespace prefixes to a tuple of form
   11.33 +        (link_class, namespace_value).
   11.34 +        """
   11.35 +        res = {}
   11.36 +        for name, num in namespaces.iteritems():
   11.37 +            name = name.lower()
   11.38 +            if num == cls.NS_CATEGORY:
   11.39 +                res[name] = (CategoryLink, num)
   11.40 +            elif num == cls.NS_IMAGE:
   11.41 +                res[name] = (ImageLink, num)
   11.42 +            else:
   11.43 +                res[name] = (NamespaceLink, num)
   11.44 +
   11.45 +        for name, target in interwikis.iteritems():
   11.46 +            res[name.lower()] = (InterwikiLink, target)
   11.47 +
   11.48 +        for lang in langs:
   11.49 +            res[lang.lower()] = (LangLink, lang)
   11.50 +
   11.51 +        return res
   11.52          
   11.53 +    @classmethod
   11.54 +    def _setSpecializeMap(cls, nsMap='default'):
   11.55 +        cls._specializeMap = cls._buildSpecializeMap(
   11.56 +            namespace_maps[nsMap], interwiki_map, languages)
   11.57 +
   11.58      def _specialize(self):
   11.59 +        """
   11.60 +        Handles different forms of link, e.g.:
   11.61 +            - [[Foo]]
   11.62 +            - [[Foo|Bar]]
   11.63 +            - [[Category:Foo]]
   11.64 +            - [[:Category:Foo]]
   11.65 +        """
   11.66 +
   11.67          if not self.children:
   11.68              return
   11.69  
   11.70          if type(self.children[0]) != Text:
   11.71              return
   11.72              
   11.73 -        self.target = target = self.children[0].caption.strip()
   11.74 +        # Handle [[Foo|Bar]]
   11.75 +        full_target = self.children[0].caption.strip()
   11.76          del self.children[0]
   11.77          if self.children and self.children[0] == Control("|"):
   11.78              del self.children[0]
   11.79 +
   11.80 +        # Mark [[:Category:Foo]]. See below
   11.81 +        if full_target.startswith(':'):
   11.82 +            self.colon = True
   11.83 +            full_target = full_target[1:]
   11.84 +        self.full_target = full_target
   11.85          
   11.86 -        pic = self.target
   11.87 -        if pic.startswith(':'):
   11.88 -            self.colon = True
   11.89 -            
   11.90 -        
   11.91 -        
   11.92 -        # pic == "Bild:Wappen_von_Budenheim.png"
   11.93 -        
   11.94 -        pic = pic.strip(': ')
   11.95 -        if ':' not in pic:
   11.96 -            return
   11.97 -            
   11.98 -        linktype, pic = pic.split(':', 1)
   11.99 -        linktype = linktype.lower().strip(" :")
  11.100 -        
  11.101 -        if linktype in ("category", "kategorie"):
  11.102 -            self.__class__ = CategoryLink
  11.103 -            self.target = pic.strip()
  11.104 +        try:
  11.105 +            ns, title = full_target.split(':', 1)
  11.106 +        except ValueError:
  11.107 +            self.namespace = self.NS_MAIN
  11.108 +            self.target = full_target
  11.109 +            self.__class__ = ArticleLink
  11.110              return
  11.111  
  11.112 -        if linktype in self.specialPrefixes:
  11.113 -            self.__class__ = SpecialLink
  11.114 -            self.target = pic.strip()
  11.115 -            self.ns = linktype            
  11.116 +        (self.__class__, self.namespace) = (
  11.117 +                self._specializeMap.get(ns.lower(), (ArticleLink, self.NS_MAIN)))
  11.118  
  11.119 +        if len(ns) == 2:
  11.120 +            # Assume this is an unlisted language
  11.121 +            self.__class__ = LangLink
  11.122 +            self.namespace = ns.lower()
  11.123 +
  11.124 +        if self.colon and self.namespace != self.NS_MAIN:
  11.125 +            # [[:Category:Foo]] should not be a category link
  11.126 +            self.__class__ = NamespaceLink
  11.127 +
  11.128 +        if self.namespace == self.NS_MAIN:
  11.129 +            # e.g. [[Blah: Foo]] is an ordinary article with a colon
  11.130 +            self.target = full_target
  11.131 +        else:
  11.132 +            self.target = title
  11.133 +
  11.134 +        if self.__class__ == ImageLink:
  11.135 +            # Handle images. First ensure they are syntactically sound.
  11.136 +
  11.137 +            try:
  11.138 +                prefix, suffix = title.rsplit('.', 1)
  11.139 +                if suffix.lower() in ['jpg', 'jpeg', 'gif', 'png', 'svg']:
  11.140 +                    self._readArgs() # calls Image._readArgs()
  11.141 +                    return
  11.142 +            except ValueError:
  11.143 +                pass
  11.144 +            # We can't handle this as an image, so default:
  11.145 +            self.__class__ = NamespaceLink 
  11.146 +    
  11.147 +
  11.148 +    capitalizeTarget = False # Wiki-dependent setting, e.g. Wikipedia => True
  11.149 +
  11.150 +    _SPACE_RE = re.compile('[_\s]+')
  11.151 +    def _normalizeTarget(self):
  11.152 +        """
  11.153 +        Normalizes the format of the target with regards to whitespace and
  11.154 +        capitalization (depending on capitalizeTarget setting).
  11.155 +        """
  11.156 +
  11.157 +        if not self.target:
  11.158              return
  11.159  
  11.160 -        if linktype in self.languages:
  11.161 -            self.__class__ = LangLink
  11.162 -            return
  11.163 -            
  11.164 -        
  11.165 -        if linktype not in ("bild", "image", "imagen"):
  11.166 -            # assume a LangLink
  11.167 -            log.info("Unknown linktype:", repr(linktype))
  11.168 -            if len(linktype)==2:
  11.169 -                self.__class__ = LangLink
  11.170 -            return
  11.171 -        
  11.172 -        
  11.173 -        # pic == "Wappen_von_Budenheim.png"
  11.174 -        
  11.175 -        try:
  11.176 -            prefix, suffix = pic.rsplit('.', 1)
  11.177 -        except ValueError:
  11.178 -            return
  11.179 +        # really we should have a urllib.unquote() first, but in practice this
  11.180 +        # format may be rare enough to ignore
  11.181  
  11.182 -        if suffix.lower() in ['jpg', 'jpeg', 'gif', 'png', 'svg']:
  11.183 -            self.__class__ = ImageLink
  11.184 -            self.target = pic.strip()
  11.185 +        # [[__init__]] -> [[init]]
  11.186 +        self.target = self._SPACE_RE.sub(' ', self.target).strip()
  11.187 +        if self.capitalizeTarget:
  11.188 +            self.target = self.target[:1].upper() + self.target[1:]
  11.189  
  11.190  
  11.191 +# Link forms:
  11.192  
  11.193 +class ArticleLink(Link):
  11.194 +    pass
  11.195 +
  11.196 +class SpecialLink(Link):
  11.197 +    pass
  11.198 +
  11.199 +class NamespaceLink(SpecialLink):
  11.200 +    pass
  11.201 +
  11.202 +class InterwikiLink(SpecialLink):
  11.203 +    pass
  11.204 +
  11.205 +# Non-links with same syntax:
  11.206 +
  11.207 +class LangLink(Link):
  11.208 +    pass
  11.209 +
  11.210 +class CategoryLink(Link):
  11.211 +    pass
  11.212 +
  11.213 +class ImageLink(Link):
  11.214 +    target = None
  11.215 +    width = None
  11.216 +    height = None
  11.217 +    align = ''
  11.218 +    thumb = False
  11.219 +    
  11.220 +    def isInline(self):
  11.221 +        return not bool(self.align or self.thumb)
  11.222 +
  11.223 +    def _readArgs(self):
  11.224          idx = 0
  11.225          last = []
  11.226          
  11.227 @@ -328,25 +413,8 @@
  11.228          
  11.229          if not self.children:
  11.230              self.children = last
  11.231 -            
  11.232 -class ImageLink(Link):
  11.233 -    target = None
  11.234 -    width = None
  11.235 -    height = None
  11.236 -    align = ''
  11.237 -    thumb = False
  11.238 -    
  11.239 -    def isInline(self):
  11.240 -        return not bool(self.align or self.thumb)
  11.241 -    
  11.242 -class LangLink(Link):
  11.243 -    pass
  11.244  
  11.245 -class CategoryLink(Link):
  11.246 -    pass
  11.247 -
  11.248 -class SpecialLink(Link):
  11.249 -    pass
  11.250 +Link._setSpecializeMap('default') # initialise the Link class
  11.251  
  11.252              
  11.253  class Text(Node):
  11.254 @@ -365,10 +433,10 @@
  11.255  class Control(Text):
  11.256      pass
  11.257  
  11.258 -def _parseAtomFromString(s):
  11.259 +def _parseAtomFromString(s, lang=None):
  11.260      from mwlib import scanner
  11.261      tokens = scanner.tokenize(s)
  11.262 -    p=Parser(tokens)
  11.263 +    p=Parser(tokens, lang=lang)
  11.264      try:
  11.265          return p.parseAtom()
  11.266      except Exception, err:
  11.267 @@ -377,10 +445,10 @@
  11.268  
  11.269                    
  11.270      
  11.271 -def parse_fields_in_imagemap(imap):
  11.272 +def parse_fields_in_imagemap(imap, lang=None):
  11.273      
  11.274      if imap.image:
  11.275 -        imap.imagelink = _parseAtomFromString(u'[['+imap.image+']]')
  11.276 +        imap.imagelink = _parseAtomFromString(u'[['+imap.image+']]', lang=lang)
  11.277          if not isinstance(imap.imagelink, ImageLink):
  11.278              imap.imagelink = None
  11.279  
  11.280 @@ -397,13 +465,22 @@
  11.281  _ALPHA_RE = re.compile(r'[^\W\d_]+', re.UNICODE) # Matches alpha strings
  11.282              
  11.283  class Parser(object):
  11.284 -    def __init__(self, tokens, name=''):
  11.285 +    def __init__(self, tokens, name='', lang=None):
  11.286          self.tokens = tokens
  11.287 +        self.lang = lang
  11.288          self.pos = 0
  11.289          self.name = name
  11.290          self.lastpos = 0
  11.291          self.count = 0
  11.292 -
  11.293 +        
  11.294 +        if lang:
  11.295 +            nsMap = '%s+en_mw' % lang
  11.296 +            if nsMap not in namespace_maps:
  11.297 +                nsMap = 'default'
  11.298 +        else:
  11.299 +            nsMap = 'default'
  11.300 +        Link._setSpecializeMap(nsMap)
  11.301 +        
  11.302          from mwlib import tagext
  11.303          self.tagextensions = tagext.default_registry
  11.304          
  11.305 @@ -548,7 +625,7 @@
  11.306  
  11.307          if not obj.children and obj.target:
  11.308              # [[a]] -> [[a|a]]
  11.309 -            obj.append(Text(obj.target))
  11.310 +            obj.append(Text(obj.full_target))
  11.311  
  11.312          if isinstance(obj, ImageLink):
  11.313              return obj
  11.314 @@ -559,6 +636,8 @@
  11.315                  # [[a|a]]b -> [[a|ab]]
  11.316                  obj.append(Text(m.group(0)), True)
  11.317                  self.tokens[self.pos] = ('TEXT', self.token[1][m.end():])
  11.318 +
  11.319 +        obj._normalizeTarget()
  11.320              
  11.321          return obj
  11.322      
  11.323 @@ -668,7 +747,7 @@
  11.324                  continue
  11.325  
  11.326              # either image link or text inside
  11.327 -            n=_parseAtomFromString(u'[['+x+']]')
  11.328 +            n=_parseAtomFromString(u'[['+x+']]', lang=self.lang)
  11.329  
  11.330              if isinstance(n, ImageLink):
  11.331                  children.append(n)
  11.332 @@ -684,7 +763,7 @@
  11.333          txt = "".join(x.caption for x in node.find(Text))
  11.334          from mwlib import imgmap
  11.335          node.imagemap = imgmap.ImageMapFromString(txt)
  11.336 -        parse_fields_in_imagemap(node.imagemap)
  11.337 +        parse_fields_in_imagemap(node.imagemap, lang=self.lang)
  11.338  
  11.339          #print node.imagemap
  11.340          return node
    12.1 --- a/mwlib/uparser.py	Thu Jul 03 17:11:35 2008 +0200
    12.2 +++ b/mwlib/uparser.py	Thu Jul 03 17:11:44 2008 +0200
    12.3 @@ -76,7 +76,7 @@
    12.4  
    12.5  postprocessors = [removeBoilerplate, simplify, fixlitags]
    12.6  
    12.7 -def parseString(title=None, raw=None, wikidb=None, revision=None):
    12.8 +def parseString(title=None, raw=None, wikidb=None, revision=None, lang=None):
    12.9      """parse article with title from raw mediawiki text"""
   12.10      assert title is not None 
   12.11  
   12.12 @@ -86,12 +86,16 @@
   12.13      if wikidb:
   12.14          te = expander.Expander(raw, pagename=title, wikidb=wikidb)
   12.15          input = te.expandTemplates()
   12.16 +        if lang is None and hasattr(wikidb, 'getSource'):
   12.17 +            src = wikidb.getSource()
   12.18 +            if src:
   12.19 +                lang = src.get('language')
   12.20      else:
   12.21          input = raw
   12.22 -
   12.23 +    
   12.24      tokens = scanner.tokenize(input, title)
   12.25  
   12.26 -    a = parser.Parser(tokens, title).parse()
   12.27 +    a = parser.Parser(tokens, title, lang=lang).parse()
   12.28      a.caption = title
   12.29      for x in postprocessors:
   12.30          x(a)
    13.1 --- a/mwlib/wiki.py	Thu Jul 03 17:11:35 2008 +0200
    13.2 +++ b/mwlib/wiki.py	Thu Jul 03 17:11:44 2008 +0200
    13.3 @@ -135,8 +135,8 @@
    13.4      def get_source(self):
    13.5          if 'source' in self.metabook:
    13.6              return self.metabook['source']
    13.7 -        if hasattr(self.wiki, 'getMetaData'):
    13.8 -            return self.wiki.getMetaData()
    13.9 +        if hasattr(self.wiki, 'getSource'):
   13.10 +            return self.wiki.getSource()
   13.11          return metabook.make_source(
   13.12              name=self.configparser.get('wiki', 'name'),
   13.13              url=self.configparser.get('wiki', 'url'),
    14.1 --- a/mwlib/zipwiki.py	Thu Jul 03 17:11:35 2008 +0200
    14.2 +++ b/mwlib/zipwiki.py	Thu Jul 03 17:11:44 2008 +0200
    14.3 @@ -36,6 +36,9 @@
    14.4              pass
    14.5          return None
    14.6      
    14.7 +    def getSource(self):
    14.8 +        return self.metabook.get('source')
    14.9 +    
   14.10      def getRawArticle(self, title, revision=None):
   14.11          article = self._getArticle(title, revision=revision)
   14.12          if article:
    15.1 --- a/tests/test_parser.py	Thu Jul 03 17:11:35 2008 +0200
    15.2 +++ b/tests/test_parser.py	Thu Jul 03 17:11:44 2008 +0200
    15.3 @@ -610,3 +610,59 @@
    15.4      assert u'<nosuchtag>' in txt, 'opening tag missing in asText()'
    15.5      assert u'</nosuchtag>' in txt, 'closing tag missing in asText()'
    15.6      
    15.7 +# Test varieties of link
    15.8 +
    15.9 +def test_plain_link():
   15.10 +    r=parse("[[bla]]").find(parser.ArticleLink)[0]
   15.11 +    assert r.target=='bla'
   15.12 +    assert r.children[0].caption == 'bla'
   15.13 +
   15.14 +def test_piped_link():
   15.15 +    r=parse("[[bla|blubb]]").find(parser.ArticleLink)[0]
   15.16 +    assert r.target=='bla'
   15.17 +    assert r.children[0].caption == 'blubb'
   15.18 +
   15.19 +def test_category_link():
   15.20 +    r=parse("[[category:bla]]").find(parser.CategoryLink)[0]
   15.21 +    assert r.target=='bla'
   15.22 +    assert r.namespace == 14
   15.23 +
   15.24 +def test_category_colon_link():
   15.25 +    r=parse("[[:category:bla]]").find(parser.SpecialLink)[0]
   15.26 +    assert r.target=='bla'
   15.27 +    assert r.namespace == 14
   15.28 +    assert not isinstance(r, parser.CategoryLink)
   15.29 +
   15.30 +def test_image_colon_link():
   15.31 +    r=parse("[[:image:bla.jpg]]").find(parser.SpecialLink)[0]
   15.32 +    assert r.target=='bla.jpg'
   15.33 +    assert r.namespace == 6
   15.34 +    assert not isinstance(r, parser.ImageLink)
   15.35 +
   15.36 +def test_interwiki_link():
   15.37 +    r=parse("[[wict:bla]]").find(parser.SpecialLink)[0]
   15.38 +    assert r.target=='bla'
   15.39 +    assert r.namespace == 'wiktionary'
   15.40 +
   15.41 +def test_language_link():
   15.42 +    r=parse("[[es:bla]]").find(parser.LangLink)[0]
   15.43 +    assert r.target=='bla'
   15.44 +    assert r.namespace == 'es'
   15.45 +
   15.46 +def test_long_language_link():
   15.47 +    r=parse("[[csb:bla]]").find(parser.LangLink)[0]
   15.48 +    assert r.target=='bla'
   15.49 +    assert r.namespace == 'csb'
   15.50 +
   15.51 +def test_normalize():
   15.52 +    r=parse("[[MediaWiki:__bla_ _]]").find(parser.LangLink)[0]
   15.53 +    assert r.target=='bla'
   15.54 +    assert r.namespace == 8
   15.55 +
   15.56 +def test_normalize_with_caps():
   15.57 +    parser.Link.capitalizeTarget = True
   15.58 +    r=parse("[[MediaWiki:__bla_ _ ]]").find(parser.LangLink)[0]
   15.59 +    parser.Link.capitalizeTarget = False
   15.60 +    assert r.target=='Bla'
   15.61 +    assert r.namespace == 8
   15.62 +    assert r.children[0].caption == 'MediaWiki:__bla_ _'