1.1 --- a/MANIFEST.in Thu Jul 03 17:11:35 2008 +0200
1.2 +++ b/MANIFEST.in Thu Jul 03 17:11:44 2008 +0200
1.3 @@ -63,6 +63,7 @@
1.4 include mwlib/serve.py
1.5 include mwlib/snippets.py
1.6 include mwlib/snippets.txt
1.7 +include mwlib/tagext.py
1.8 include mwlib/texmap.py
1.9 include mwlib/timeline.py
1.10 include mwlib/uparser.py
1.11 @@ -97,6 +98,7 @@
1.12 include tests/test_sanitychecker.py
1.13 include tests/test_scanner.py
1.14 include tests/test_table.py
1.15 +include tests/test_tagext.py
1.16 include tests/test_timeline.py
1.17 include tests/test_utils.py
1.18 include tests/test_xhtmlwriter.py
2.1 --- a/docs/metabook.txt Thu Jul 03 17:11:35 2008 +0200
2.2 +++ b/docs/metabook.txt Thu Jul 03 17:11:44 2008 +0200
2.3 @@ -74,6 +74,10 @@
2.4
2.5 Unique name of source, e.g. "Wikipedia (en)"
2.6
2.7 +language (string)
2.8 +
2.9 + 2-character ISO code of language, e.g. "en"
2.10 +
2.11
2.12 License
2.13 -------
3.1 --- a/mwlib/cdb.py Thu Jul 03 17:11:35 2008 +0200
3.2 +++ b/mwlib/cdb.py Thu Jul 03 17:11:44 2008 +0200
3.3 @@ -48,7 +48,7 @@
3.4 def close(self):
3.5 self.map.close()
3.6
3.7 - def __iter__(self, fn=None):
3.8 + def __iter__(self):
3.9 len = 2048
3.10 while len < self.eod:
3.11 klen, vlen = struct.unpack("<LL", self.map[len:len+8])
3.12 @@ -57,37 +57,25 @@
3.13 len += klen
3.14 val = self.map[len:len+vlen]
3.15 len += vlen
3.16 - if fn:
3.17 - yield fn(key, val)
3.18 - else:
3.19 - yield (key, val)
3.20 + yield (key, val)
3.21
3.22 def iteritems(self):
3.23 return self.__iter__()
3.24
3.25 def iterkeys(self):
3.26 - return self.__iter__(lambda k,v: k)
3.27 + return (k for k, v in self)
3.28
3.29 def itervalues(self):
3.30 - return self.__iter__(lambda k,v: v)
3.31 + return (v for k, v in self)
3.32
3.33 def items(self):
3.34 - ret = []
3.35 - for i in self.iteritems():
3.36 - ret.append(i)
3.37 - return ret
3.38 + return list(self.iteritems())
3.39
3.40 def keys(self):
3.41 - ret = []
3.42 - for i in self.iterkeys():
3.43 - ret.append(i)
3.44 - return ret
3.45 + return list(self.iterkeys())
3.46
3.47 def values(self):
3.48 - ret = []
3.49 - for i in self.itervalues():
3.50 - ret.append(i)
3.51 - return ret
3.52 + return list(self.itervalues())
3.53
3.54 def findstart(self):
3.55 self.loop = 0
4.1 --- a/mwlib/cdbwiki.py Thu Jul 03 17:11:35 2008 +0200
4.2 +++ b/mwlib/cdbwiki.py Thu Jul 03 17:11:44 2008 +0200
4.3 @@ -8,206 +8,129 @@
4.4 import zlib
4.5 import re
4.6
4.7 -from mwlib import cdb
4.8 -
4.9 -try:
4.10 - from xml.etree import cElementTree
4.11 -except ImportError:
4.12 - import cElementTree
4.13 -
4.14 -ns = '{http://www.mediawiki.org/xml/export-0.3/}'
4.15 -
4.16 -wikiindex = "wikiidx"
4.17 -wikidata = "wikidata.bin"
4.18 -
4.19 -
4.20 +from mwlib import cdb, dumpparser
4.21
4.22 def normname(name):
4.23 name = name.strip().replace("_", " ")
4.24 name = name[:1].upper()+name[1:]
4.25 return name
4.26
4.27 -class Tags:
4.28 - page = ns + 'page'
4.29 +class ZCdbWriter(cdb.CdbMake):
4.30 + def __init__(self, indexpath, datapath=None):
4.31 + if not datapath:
4.32 + datapath = indexpath + 'data.bin'
4.33 + indexpath = indexpath + 'idx.cdb'
4.34
4.35 - # <title> inside <page>
4.36 - title = ns + 'title'
4.37 + cdb.CdbMake.__init__(self, open(indexpath, 'wb'))
4.38 + self.data = open(datapath, 'wb')
4.39
4.40 - # <revision> inside <page>
4.41 - revision = ns + 'revision'
4.42 + def add(self, key, val):
4.43 + key = key.encode("utf-8")
4.44 + val = zlib.compress(val.encode('utf-8')) # NOTE: encode wasn't in original
4.45 + pos = self.data.tell()
4.46 + self.data.write(val)
4.47 + cdb.CdbMake.add(self, key, "%s %s" % (pos, len(val)))
4.48
4.49 - # <id> inside <revision>
4.50 - revid = ns + 'id'
4.51 + def finish(self):
4.52 + cdb.CdbMake.finish(self)
4.53 + self.data.close()
4.54
4.55 - # <contributor><username> inside <revision>
4.56 - username = ns + 'contributor/' + ns + 'username'
4.57
4.58 - # <text> inside <revision>
4.59 - text = ns + 'text'
4.60 +class ZCdbReader(cdb.Cdb):
4.61 + def __init__(self, indexpath, datapath=None):
4.62 + if not datapath:
4.63 + datapath = indexpath + 'data.bin'
4.64 + indexpath = indexpath + 'idx.cdb'
4.65
4.66 - # <timestamp> inside <revision>
4.67 - timestamp = ns + 'timestamp'
4.68 + cdb.Cdb.__init__(self, open(indexpath, 'rb'))
4.69 + self.datapath = datapath
4.70
4.71 - # <revision><text> inside <page>
4.72 - revision_text = ns + 'revision/' + ns + 'text'
4.73 + def __getitem__(self, key):
4.74 + key = key.encode("utf-8")
4.75 + data = cdb.Cdb.__getitem__(self, key) # may raise KeyError
4.76 + return self._readz(data)
4.77
4.78 - siteinfo = ns + "siteinfo"
4.79 + def _readz(self, data):
4.80 + pos, len = map(int, data.split())
4.81 +
4.82 + f=open(self.datapath, "rb")
4.83 + f.seek(pos)
4.84 + d=f.read(len)
4.85 + f.close()
4.86 + return zlib.decompress(d).decode('utf-8')
4.87
4.88 -class DumpParser(object):
4.89 - category_ns = set(['category', 'kategorie'])
4.90 - image_ns = set(['image', 'bild'])
4.91 - template_ns = set(['template', 'vorlage'])
4.92 - wikipedia_ns = set(['wikipedia'])
4.93 + def iterkeys(self):
4.94 + return (k.decode('utf-8') for k in cdb.Cdb.iterkeys(self))
4.95
4.96 - tags = Tags()
4.97 + def iteritems(self):
4.98 + return ((k.decode('utf-8'), self._readz(v))
4.99 + for k,v in cdb.Cdb.iteritems(self))
4.100
4.101 + def itervalues(self):
4.102 + return (self._readz(v) for v in cdb.Cdb.itervalues(self))
4.103
4.104 - def __init__(self, xmlfilename):
4.105 - self.xmlfilename = xmlfilename
4.106
4.107 - def _write(self, msg):
4.108 - sys.stdout.write(msg)
4.109 - sys.stdout.flush()
4.110 -
4.111 - def openInputStream(self):
4.112 - if self.xmlfilename.lower().endswith(".bz2"):
4.113 - f = os.popen("bunzip2 -c %s" % self.xmlfilename, "r")
4.114 - elif self.xmlfilename.lower().endswith(".7z"):
4.115 - f = os.popen("7z -so x %s" % self.xmlfilename, "r")
4.116 +class BuildWiki():
4.117 + def __init__(self, dumpfile, outputdir, prefix='wiki'):
4.118 + if type(dumpfile) in (type(''), type(u'')):
4.119 + self.dumpParser = dumpparser.DumpParser(dumpfile)
4.120 else:
4.121 - f = open(self.xmlfilename, "r")
4.122 -
4.123 - return f
4.124 -
4.125 - def __call__(self):
4.126 - f = self.openInputStream()
4.127 -
4.128 - count = 0
4.129 - for event, elem in cElementTree.iterparse(f):
4.130 - if elem.tag != self.tags.page:
4.131 - continue
4.132 - self.handlePageElement(elem)
4.133 - elem.clear()
4.134 - count += 1
4.135 -
4.136 - if count % 5000 == 0:
4.137 - self._write(" %s\n" % count)
4.138 - elif count % 100 == 0:
4.139 - self._write(".")
4.140 -
4.141 -
4.142 - def handlePageElement(self, page):
4.143 - title = page.find(self.tags.title).text
4.144 - revisions = page.findall(self.tags.revision)
4.145 - if not revisions:
4.146 - return
4.147 - revision = revisions[-1]
4.148 -
4.149 - texttag = revision.find(self.tags.text)
4.150 - timestamptag = revision.find(self.tags.timestamp)
4.151 - revision.clear()
4.152 -
4.153 - if texttag is not None:
4.154 - text = texttag.text
4.155 - texttag.clear()
4.156 - else:
4.157 - text = None
4.158 -
4.159 - if timestamptag is not None:
4.160 - timestamp = timestamptag.text
4.161 - timestamptag.clear()
4.162 - else:
4.163 - timestamp = None
4.164 -
4.165 - if not text:
4.166 - return
4.167 -
4.168 - if isinstance(title, str):
4.169 - title = unicode(title)
4.170 - if isinstance(text, str):
4.171 - text = unicode(text)
4.172 -
4.173 -
4.174 - if ':' in title:
4.175 - ns, rest = title.split(':', 1)
4.176 - ns = ns.lower()
4.177 - if ns not in self.template_ns:
4.178 - return
4.179 - self.handleTemplate(rest, text, timestamp)
4.180 - else:
4.181 - self.handleArticle(title, text, timestamp)
4.182 -
4.183 - def handleArticle(self, title, text, timestamp):
4.184 - print "ART:", repr(title), len(text), timestamp
4.185 -
4.186 - def handleTemplate(self, title, text, timestamp):
4.187 - print "TEMPL:", repr(title), len(text), timestamp
4.188 -
4.189 -class BuildWiki(DumpParser):
4.190 - def __init__(self, xmlfilename, outputdir):
4.191 - DumpParser.__init__(self, xmlfilename)
4.192 + self.dumpParser = dumpfile
4.193 + self.output_path = os.path.join(outputdir, prefix)
4.194 self.outputdir = outputdir
4.195
4.196 def __call__(self):
4.197 if not os.path.exists(self.outputdir):
4.198 os.makedirs(self.outputdir)
4.199
4.200 - n = os.path.join(self.outputdir, wikiindex)
4.201 - out = open(os.path.join(self.outputdir, wikidata), "wb")
4.202 - self.out = out
4.203 - f = open(n+'.cdb', 'wb')
4.204 - c = cdb.CdbMake(f)
4.205 - self.cdb = c
4.206 + self.writer = ZCdbWriter(self.output_path)
4.207
4.208 - DumpParser.__call__(self)
4.209 - c.finish()
4.210 - f.close()
4.211 + count = 0
4.212 + for page in self.dumpParser:
4.213 + if page.namespace == dumpparser.NS_MAIN:
4.214 + self.handleArticle(page.title, page.text, page.timestamp)
4.215 + elif page.namespace == dumpparser.NS_TEMPLATE:
4.216 + self.handleTemplate(page.title, page.text, page.timestamp)
4.217 + else:
4.218 + self.handleOther(page.title, page.text, page.timestamp)
4.219
4.220 + count += 1
4.221 + if count % 5000 == 0:
4.222 + self._write(" %s\n" % count)
4.223 + elif count % 100 == 0:
4.224 + self._write(".")
4.225 +
4.226 + self.writer.finish()
4.227
4.228 - def _writeobj(self, key, val):
4.229 - key = key.encode("utf-8")
4.230 - val = zlib.compress(val)
4.231 - pos = self.out.tell()
4.232 - self.out.write(val)
4.233 - self.cdb.add(key, "%s %s" % (pos, len(val)))
4.234 + def _write(self, msg):
4.235 + sys.stdout.write(msg)
4.236 + sys.stdout.flush()
4.237
4.238 def handleArticle(self, title, text, timestamp):
4.239 - self._writeobj(u":"+title, text.encode("utf-8"))
4.240 + self.writer.add(u":"+title, text)
4.241
4.242 def handleTemplate(self, title, text, timestamp):
4.243 - self._writeobj(u"T:"+title, text.encode("utf-8"))
4.244 + self.writer.add(u"T:"+title, text)
4.245 +
4.246 + def handleOther(self, title, text, timestamp):
4.247 + self.writer.add(title, text)
4.248
4.249
4.250
4.251 class WikiDB(object):
4.252 redirect_rex = re.compile(r'^#Redirect:?\s*?\[\[(?P<redirect>.*?)\]\]', re.IGNORECASE)
4.253
4.254 - def __init__(self, dir):
4.255 + def __init__(self, dir, prefix='wiki'):
4.256 self.dir = dir
4.257 - self.obj2pos_path = os.path.join(self.dir, wikidata)
4.258 - self.cdb = cdb.Cdb(open(os.path.join(self.dir, wikiindex+'.cdb'), 'rb'))
4.259 -
4.260 - def _readobj(self, key):
4.261 - key = key.encode("utf-8")
4.262 -
4.263 - try:
4.264 - data = self.cdb[key]
4.265 - except KeyError:
4.266 - return None
4.267 -
4.268 - pos, len = map(int, data.split())
4.269 -
4.270 - f=open(self.obj2pos_path, "rb")
4.271 - f.seek(pos)
4.272 - d=f.read(len)
4.273 - f.close()
4.274 - return zlib.decompress(d)
4.275 + self.reader = ZCdbReader(os.path.join(self.dir, prefix))
4.276
4.277 def getRawArticle(self, title, raw=None, revision=None):
4.278 title = normname(title)
4.279 - res = self._readobj(":"+title)
4.280 - if res is None:
4.281 - return None
4.282 + print repr(title)
4.283 + try:
4.284 + res = self.reader[":"+title]
4.285 + except KeyError:
4.286 + return None
4.287
4.288 res = unicode(res, 'utf-8')
4.289 mo = self.redirect_rex.search(res)
4.290 @@ -224,9 +147,10 @@
4.291 title = title.split(':', 1)[1]
4.292
4.293 title = normname(title)
4.294 - res = unicode(self._readobj(u"T:"+title) or "", 'utf-8')
4.295 - if not res:
4.296 - return res
4.297 + try:
4.298 + res = self.reader["T:"+title]
4.299 + except KeyError:
4.300 + return ''
4.301
4.302 mo = self.redirect_rex.search(res)
4.303 if mo:
4.304 @@ -237,7 +161,12 @@
4.305
4.306
4.307 def articles(self):
4.308 - for k, v in self.cdb:
4.309 - if k[0]==':':
4.310 - k = unicode(k[1:], "utf-8")
4.311 - yield k
4.312 + return (k[1:]
4.313 + for k in self.reader.iterkeys()
4.314 + if k[0] == ':')
4.315 +
4.316 + def article_texts(self):
4.317 + return ((k[1:], v)
4.318 + for k in self.reader.iteritems()
4.319 + if k[0] == ':')
4.320 +
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
5.2 +++ b/mwlib/dumpparser.py Thu Jul 03 17:11:44 2008 +0200
5.3 @@ -0,0 +1,210 @@
5.4 +import os
5.5 +import re
5.6 +
5.7 +try:
5.8 + from xml.etree import cElementTree
5.9 +except ImportError:
5.10 + import cElementTree
5.11 +
5.12 +ns = '{http://www.mediawiki.org/xml/export-0.3/}'
5.13 +class Tags:
5.14 +
5.15 + # <namespaces><namespace> inside <siteinfo>
5.16 + namespace = ns + 'namespaces/' + ns + 'namespace'
5.17 +
5.18 + page = ns + 'page'
5.19 +
5.20 + # <title> inside <page>
5.21 + title = ns + 'title'
5.22 +
5.23 + # <revision> inside <page>
5.24 + revision = ns + 'revision'
5.25 +
5.26 + # <id> inside <revision>
5.27 + revid = ns + 'id'
5.28 +
5.29 + # <contributor><username> inside <revision>
5.30 + username = ns + 'contributor/' + ns + 'username'
5.31 +
5.32 + # <text> inside <revision>
5.33 + text = ns + 'text'
5.34 +
5.35 + # <timestamp> inside <revision>
5.36 + timestamp = ns + 'timestamp'
5.37 +
5.38 + # <revision><text> inside <page>
5.39 + revision_text = ns + 'revision/' + ns + 'text'
5.40 +
5.41 + siteinfo = ns + "siteinfo"
5.42 +
5.43 +NS_MEDIA = -2
5.44 +NS_SPECIAL = -1
5.45 +NS_MAIN = 0
5.46 +NS_TALK = 1
5.47 +NS_USER = 2
5.48 +NS_USER_TALK = 3
5.49 +NS_PROJECT = 4
5.50 +NS_PROJECT_TALK = 5
5.51 +NS_IMAGE = 6
5.52 +NS_IMAGE_TALK = 7
5.53 +NS_MEDIAWIKI = 8
5.54 +NS_MEDIAWIKI_TALK = 9
5.55 +NS_TEMPLATE = 10
5.56 +NS_TEMPLATE_TALK = 11
5.57 +NS_HELP = 12
5.58 +NS_HELP_TALK = 13
5.59 +NS_CATEGORY = 14
5.60 +NS_CATEGORY_TALK = 15
5.61 +
5.62 +class Page(object):
5.63 + __slots__ = [
5.64 + 'title', 'pageid', 'namespace_text',
5.65 + 'namespace',
5.66 + 'revid', 'timestamp',
5.67 + 'username', 'userid',
5.68 + 'minor', 'comment', 'text'
5.69 + ]
5.70 +
5.71 + def __init__(self):
5.72 + self.namespace_text = ''
5.73 + self.namespace = NS_MAIN
5.74 +
5.75 + redirect_rex = re.compile(r'^#Redirect:?\s*?\[\[(?P<redirect>.*?)\]\]', re.IGNORECASE)
5.76 +
5.77 + @property
5.78 + def redirect(self):
5.79 + mo = self.redirect_rex.search(self.text)
5.80 + if mo:
5.81 + return mo.group('redirect').split("|", 1)[0]
5.82 + return None
5.83 +
5.84 + def __repr__(self):
5.85 + text = repr(self.text[:50])
5.86 + redir = self.redirect
5.87 + if redir:
5.88 + text = "Redirect to %s" % repr(redir)
5.89 + return 'Page(%s (@%s): %s)' % (repr(self.title), self.timestamp, text)
5.90 +
5.91 +
5.92 +class DumpParser(object):
5.93 + namespaces = {
5.94 + 'template': NS_TEMPLATE,
5.95 + 'vorlage': NS_TEMPLATE,
5.96 + 'category': NS_CATEGORY,
5.97 + 'kategorie': NS_CATEGORY,
5.98 + 'image': NS_IMAGE,
5.99 + 'bild': NS_IMAGE,
5.100 + 'wikipedia': NS_PROJECT,
5.101 + }
5.102 +
5.103 + default_namespaces = [NS_MAIN, NS_TEMPLATE]
5.104 +
5.105 + tags = Tags()
5.106 +
5.107 + def __init__(self, xmlfilename,
5.108 + namespace_filter=default_namespaces,
5.109 + ignore_redirects=False):
5.110 + self.xmlfilename = xmlfilename
5.111 + self.namespace_filter = namespace_filter
5.112 + self.ignore_redirects = ignore_redirects
5.113 +
5.114 + def openInputStream(self):
5.115 + if self.xmlfilename.lower().endswith(".bz2"):
5.116 + f = os.popen("bunzip2 -c %s" % self.xmlfilename, "r")
5.117 + elif self.xmlfilename.lower().endswith(".7z"):
5.118 + f = os.popen("7z -so x %s" % self.xmlfilename, "r")
5.119 + else:
5.120 + f = open(self.xmlfilename, "r")
5.121 +
5.122 + return f
5.123 +
5.124 + @staticmethod
5.125 + def getTag(elem):
5.126 + # rough is good enough
5.127 + return elem.tag[elem.tag.rindex('}')+1:]
5.128 +
5.129 + def handleSiteinfo(self, siteinfo):
5.130 + for nsElem in siteinfo.findall(self.tags.namespace):
5.131 + try:
5.132 + self.namespaces[nsElem.text.lower()] = int(nsElem.get('key'))
5.133 + except AttributeError:
5.134 + # text is probably None
5.135 + pass
5.136 +
5.137 + def __iter__(self):
5.138 + f = self.openInputStream()
5.139 +
5.140 + elemIter = (el for evt, el in cElementTree.iterparse(f))
5.141 + for elem in elemIter:
5.142 + if self.getTag(elem) == 'page':
5.143 + page = self.handlePageElement(elem)
5.144 + if page:
5.145 + yield page
5.146 + elem.clear()
5.147 + elif self.getTag(elem) == 'siteinfo':
5.148 + self.handleSiteinfo(elem)
5.149 + elem.clear()
5.150 +
5.151 + f.close()
5.152 +
5.153 + def handlePageElement(self, pageElem):
5.154 + res = Page()
5.155 + lastRevision = None
5.156 + for el in pageElem:
5.157 + tag = self.getTag(el)
5.158 + if tag == 'title':
5.159 + title = unicode(el.text)
5.160 + if ':' in title:
5.161 + ns, rest = title.split(':', 1)
5.162 + res.namespace = self.namespaces.get(ns.lower(), NS_MAIN)
5.163 + if res.namespace:
5.164 + title = rest
5.165 + res.namespace_text = ns
5.166 + res.title = title
5.167 + if res.namespace not in self.namespace_filter:
5.168 + return None
5.169 +
5.170 + elif tag == 'id':
5.171 + res.pageid = int(el.text)
5.172 +
5.173 + elif tag == 'revision':
5.174 + lastRevision = el
5.175 +
5.176 + if lastRevision:
5.177 + self.handleRevisionElement(lastRevision, res)
5.178 +
5.179 + if self.ignore_redirects and res.redirect:
5.180 + return None
5.181 +
5.182 + return res
5.183 +
5.184 + def handleRevisionElement(self, revElem, res):
5.185 + for el in revElem:
5.186 + tag = self.getTag(el)
5.187 + if tag == 'id':
5.188 + res.revid = int(el.text)
5.189 + elif tag == 'timestamp':
5.190 + res.timestamp = el.text
5.191 + elif tag == 'contributor':
5.192 + pass
5.193 + #res.username, res.userid = self.handleContributorElement(el)
5.194 + elif tag == 'minor':
5.195 + res.minor = True
5.196 + elif tag == 'comment':
5.197 + res.comment = unicode(el.text)
5.198 + elif tag == 'text':
5.199 + res.text = unicode(el.text)
5.200 + el.clear()
5.201 +
5.202 + return res
5.203 +
5.204 + def handleContributorElement(self, conElem):
5.205 + username = None
5.206 + userid = None
5.207 + for el in conElem:
5.208 + if self.getTag(el) == 'username':
5.209 + username = unicode(el.text)
5.210 + elif self.getTag(el) == 'id':
5.211 + userid = int(el.text)
5.212 + return (username, userid)
5.213 +
6.1 --- a/mwlib/metabook.py Thu Jul 03 17:11:35 2008 +0200
6.2 +++ b/mwlib/metabook.py Thu Jul 03 17:11:44 2008 +0200
6.3 @@ -20,7 +20,7 @@
6.4 metabook['subtitle'] = subtitle
6.5 return metabook
6.6
6.7 -def make_source(name=None, url=None):
6.8 +def make_source(name=None, url=None, language=None):
6.9 source = {
6.10 'type': 'source',
6.11 'system': 'MediaWiki',
6.12 @@ -29,6 +29,8 @@
6.13 source['name'] = name
6.14 if url:
6.15 source['url'] = url
6.16 + if language:
6.17 + source['language'] = language
6.18 return source
6.19
6.20 def make_article(title=None, displaytitle=None, content_type='text/x-wiki'):
7.1 --- a/mwlib/mwapidb.py Thu Jul 03 17:11:35 2008 +0200
7.2 +++ b/mwlib/mwapidb.py Thu Jul 03 17:11:44 2008 +0200
7.3 @@ -416,6 +416,7 @@
7.4 self.template_blacklist = []
7.5 if template_blacklist is not None:
7.6 self.setTemplateBlacklist(template_blacklist)
7.7 + self.source = None
7.8
7.9 def setTemplateBlacklist(self, template_blacklist):
7.10 raw = self.getRawArticle(template_blacklist)
7.11 @@ -525,14 +526,18 @@
7.12 except KeyError:
7.13 return None
7.14
7.15 - def getMetaData(self):
7.16 + def getSource(self):
7.17 + if self.source is not None:
7.18 + return self.source
7.19 result = self.api_helper.query(meta='siteinfo')
7.20 try:
7.21 g = result['general']
7.22 - return metabook.make_source(
7.23 + self.source = metabook.make_source(
7.24 url=g['base'],
7.25 name='%s (%s)' % (g['sitename'], g['lang']),
7.26 + language=g['lang'],
7.27 )
7.28 + return self.source
7.29 except KeyError:
7.30 return None
7.31
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
8.2 +++ b/mwlib/namespace.py Thu Jul 03 17:11:44 2008 +0200
8.3 @@ -0,0 +1,75 @@
8.4 +from mwlib.namespace_langs import lang_ns_data as _lang_ns_data
8.5 +
8.6 +NS_MEDIA = -2
8.7 +NS_SPECIAL = -1
8.8 +NS_MAIN = 0
8.9 +NS_TALK = 1
8.10 +NS_USER = 2
8.11 +NS_USER_TALK = 3
8.12 +NS_PROJECT = 4
8.13 +NS_PROJECT_TALK = 5
8.14 +NS_IMAGE = 6
8.15 +NS_IMAGE_TALK = 7
8.16 +NS_MEDIAWIKI = 8
8.17 +NS_MEDIAWIKI_TALK = 9
8.18 +NS_TEMPLATE = 10
8.19 +NS_TEMPLATE_TALK = 11
8.20 +NS_HELP = 12
8.21 +NS_HELP_TALK = 13
8.22 +NS_CATEGORY = 14
8.23 +NS_CATEGORY_TALK = 15
8.24 +
8.25 +namespace_maps = {}
8.26 +
8.27 +def add_namespace_map(key, lang, project_name, extras={}):
8.28 + ns_data = _lang_ns_data[lang]
8.29 + res = dict(zip(ns_data, _lang_ns_data_keys))
8.30 + res[project_name] = NS_PROJECT
8.31 + res[ns_data[-1] % project_name] = NS_PROJECT_TALK
8.32 + res.update(extras)
8.33 + namespace_maps[key] = res
8.34 +
8.35 +_lang_ns_data_keys = [
8.36 + NS_TALK, NS_USER, NS_USER_TALK, NS_IMAGE, NS_IMAGE_TALK,
8.37 + NS_MEDIAWIKI, NS_MEDIAWIKI_TALK, NS_TEMPLATE, NS_TEMPLATE_TALK,
8.38 + NS_HELP, NS_HELP_TALK, NS_CATEGORY, NS_CATEGORY_TALK, NS_SPECIAL, NS_MEDIA
8.39 +]
8.40 +
8.41 +add_namespace_map('enwiki', 'en', 'Wikipedia',
8.42 + {'Portal': 100, 'Portal_Talk': 101})
8.43 +add_namespace_map('dewiki', 'de', 'Wikipedia',
8.44 + {'Portal': 100, 'Portal_Diskussion': 101})
8.45 +for lang in _lang_ns_data:
8.46 + add_namespace_map('%s+en_mw' % lang, lang, 'MediaWiki', namespace_maps['enwiki'])
8.47 +
8.48 +namespace_maps['default'] = dict(namespace_maps['enwiki'].items() + namespace_maps['dewiki'].items())
8.49 +
8.50 +# external wikis:
8.51 +
8.52 +interwiki_map = {
8.53 + 'wikipedia': 'wikipedia',
8.54 + 'w': 'wikipedia',
8.55 + 'wiktionary': 'wiktionary',
8.56 + 'wikt': 'wiktionary',
8.57 + 'wikinews': 'wikinews',
8.58 + 'n': 'wikinews',
8.59 + 'wikibooks': 'wikibooks',
8.60 + 'b': 'wikibooks',
8.61 + 'wikiquote': 'wikiquote',
8.62 + 'q': 'wikiquote',
8.63 + 'wikisource': 'wikisource',
8.64 + 's': 'wikisource',
8.65 + 'wikispecies': 'wikispecies',
8.66 + 'species': 'wikispecies',
8.67 + 'v': 'wikiversity',
8.68 + 'wikimedia': 'wikimedia',
8.69 + 'foundation': 'wikimedia',
8.70 + 'commons': 'commons',
8.71 + 'meta': 'meta',
8.72 + 'm': 'meta',
8.73 + 'incubator': 'incubator',
8.74 + 'mw': 'mw',
8.75 + 'mediazilla': 'mediazilla',
8.76 +
8.77 + 'wikitravel': 'wikitravel',
8.78 +}
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
9.2 +++ b/mwlib/namespace_langs.py Thu Jul 03 17:11:44 2008 +0200
9.3 @@ -0,0 +1,76 @@
9.4 +lang_ns_data = {
9.5 +'af': [u'Bespreking', u'Gebruiker', u'Gebruikerbespreking', u'Beeld', u'Beeldbespreking', u'MediaWiki', u'MediaWikibespreking', u'Sjabloon', u'Sjabloonbespreking', u'Hulp', u'Hulpbespreking', u'Kategorie', u'Kategoriebespreking', u'Spesiaal', u'Media', u'%sbespreking'],
9.6 +'an': [u'Descusi\xf3n', u'Usuario', u'Descusi\xf3n_usuario', u'Imachen', u'Descusi\xf3n_imachen', u'MediaWiki', u'Descusi\xf3n_MediaWiki', u'Plantilla', u'Descusi\xf3n_plantilla', u'Aduya', u'Descusi\xf3n_aduya', u'Categor\xeda', u'Descusi\xf3n_categor\xeda', u'Espezial', u'Media', u'Descusi\xf3n_%s'],
9.7 +'ar': [u'\u0646\u0642\u0627\u0634', u'\u0645\u0633\u062a\u062e\u062f\u0645', u'\u0646\u0642\u0627\u0634_\u0627\u0644\u0645\u0633\u062a\u062e\u062f\u0645', u'\u0635\u0648\u0631\u0629', u'\u0646\u0642\u0627\u0634_\u0627\u0644\u0635\u0648\u0631\u0629', u'\u0645\u064a\u062f\u064a\u0627\u0648\u064a\u0643\u064a', u'\u0646\u0642\u0627\u0634_\u0645\u064a\u062f\u064a\u0627\u0648\u064a\u0643\u064a', u'\u0642\u0627\u0644\u0628', u'\u0646\u0642\u0627\u0634_\u0642\u0627\u0644\u0628', u'\u0645\u0633\u0627\u0639\u062f\u0629', u'\u0646\u0642\u0627\u0634_\u0627\u0644\u0645\u0633\u0627\u0639\u062f\u0629', u'\u062a\u0635\u0646\u064a\u0641', u'\u0646\u0642\u0627\u0634_\u0627\u0644\u062a\u0635\u0646\u064a\u0641', u'\u062e\u0627\u0635', u'\u0645\u0644\u0641', u"\u0646\u0642\u0627\u0634' . '_%s"],
9.8 +'az': [u'M\xfczakir\u0259', u'\u0130stifad\u0259\xe7i', u'\u0130stifad\u0259\xe7i_m\xfczakir\u0259si', u'\u015e\u0259kil', u'\u015e\u0259kil_m\xfczakir\u0259si', u'MediyaViki', u'MediyaViki_m\xfczakir\u0259si', u'\u015eablon', u'\u015eablon_m\xfczakir\u0259si', u'K\xf6m\u0259k', u'K\xf6m\u0259k_m\xfczakir\u0259si', u'Kateqoriya', u'Kateqoriya_m\xfczakir\u0259si', u'X\xfcsusi', u'Mediya', u'%s_m\xfczakir\u0259si'],
9.9 +'ba': [u'\u0424\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u04a0\u0430\u0442\u043d\u0430\u0448\u044b\u0443\u0441\u044b', u'\u04a0\u0430\u0442\u043d\u0430\u0448\u044b\u0443\u0441\u044b_\u043c-\u043d_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u0420\u04d9\u0441\u0435\u043c', u'\u0420\u04d9\u0441\u0435\u043c_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'MediaWiki', u'MediaWiki_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u04a0\u0430\u043b\u044b\u043f', u'\u04a0\u0430\u043b\u044b\u043f_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u0411\u0435\u043b\u0435\u0448\u043c\u04d9', u'\u0411\u0435\u043b\u0435\u0448\u043c\u04d9_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u044f', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u044f_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443', u'\u042f\u0440\u0499\u0430\u043c\u0441\u044b', u'\u041c\u0435\u0434\u0438\u0430', u'%s_\u0431-\u0441\u0430_\u0444\u0435\u043a\u0435\u0440_\u0430\u043b\u044b\u0448\u044b\u0443'],
9.10 +'be': [u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435', u'\u0423\u0434\u0437\u0435\u043b\u044c\u043d\u0456\u043a', u'\u0413\u0443\u0442\u0430\u0440\u043a\u0456_\u045e\u0434\u0437\u0435\u043b\u044c\u043d\u0456\u043a\u0430', u'\u0412\u044b\u044f\u0432\u0430', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_\u0432\u044b\u044f\u0432\u044b', u'MediaWiki', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_MediaWiki', u'\u0428\u0430\u0431\u043b\u0451\u043d', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_\u0448\u0430\u0431\u043b\u0451\u043d\u0443', u'\u0414\u0430\u043f\u0430\u043c\u043e\u0433\u0430', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_\u0434\u0430\u043f\u0430\u043c\u043e\u0433\u0456', u'\u041a\u0430\u0442\u044d\u0433\u043e\u0440\u044b\u044f', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_\u043a\u0430\u0442\u044d\u0433\u043e\u0440\u044b\u0456', u'\u0421\u043f\u044d\u0446\u044b\u044f\u043b\u044c\u043d\u044b\u044f', u'\u041c\u044d\u0434\u044b\u044f', u'\u0410\u0431\u043c\u0435\u0440\u043a\u0430\u0432\u0430\u043d\u044c\u043d\u0435_%s'],
9.11 +'bg': [u'\u0411\u0435\u0441\u0435\u0434\u0430', u'\u041f\u043e\u0442\u0440\u0435\u0431\u0438\u0442\u0435\u043b', u'\u041f\u043e\u0442\u0440\u0435\u0431\u0438\u0442\u0435\u043b_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u041a\u0430\u0440\u0442\u0438\u043d\u043a\u0430', u'\u041a\u0430\u0440\u0442\u0438\u043d\u043a\u0430_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u041c\u0435\u0434\u0438\u044f\u0423\u0438\u043a\u0438', u'\u041c\u0435\u0434\u0438\u044f\u0423\u0438\u043a\u0438_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u0428\u0430\u0431\u043b\u043e\u043d_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u041f\u043e\u043c\u043e\u0449', u'\u041f\u043e\u043c\u043e\u0449_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u044f', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u044f_\u0431\u0435\u0441\u0435\u0434\u0430', u'\u0421\u043f\u0435\u0446\u0438\u0430\u043b\u043d\u0438', u'\u041c\u0435\u0434\u0438\u044f', u'%s_\u0431\u0435\u0441\u0435\u0434\u0430'],
9.12 +'br': [u'Kaozeal', u'Implijer', u'Kaozeadenn_Implijer', u'Skeudenn', u'Kaozeadenn_Skeudenn', u'MediaWiki', u'Kaozeadenn_MediaWiki', u'Patrom', u'Kaozeadenn_Patrom',u'Skoazell', u'Kaozeadenn_Skoazell', u'Rummad', u'Kaozeadenn_Rummad', u'Dibar', u'Media', u'Kaozeadenn_%s'],
9.13 +'bs': [u'Razgovor', u'Korisnik', u'Razgovor_sa_korisnikom', u'Slika', u'Razgovor_o_slici', u'MedijaViki', u'Razgovor_o_MedijaVikiju', u'\u0160ablon', u'Razgovor_o_\u0161ablonu', u'Pomo\u0107', u'Razgovor_o_pomo\u0107i', u'Kategorija', u'Razgovor_o_kategoriji', u'Posebno', u'Medija', u'Razgovor_{{grammar:instrumental|%s}}'],
9.14 +'ca': [u'Discussi\xf3', u'Usuari', u'Usuari_Discussi\xf3', u'Imatge', u'Imatge_Discussi\xf3', u'MediaWiki', u'MediaWiki_Discussi\xf3', u'Plantilla', u'Plantilla_Discussi\xf3', u'Ajuda', u'Ajuda_Discussi\xf3', u'Categoria', u'Categoria_Discussi\xf3', u'Especial', u'Media', u'%s_Discussi\xf3'],
9.15 +'cs': [u'Diskuse', u'U\u017eivatel', u'U\u017eivatel_diskuse', u'Soubor', u'Soubor_diskuse', u'MediaWiki', u'MediaWiki_diskuse', u'\u0160ablona', u'\u0160ablona_diskuse', u'N\xe1pov\u011bda', u'N\xe1pov\u011bda_diskuse', u'Kategorie', u'Kategorie_diskuse', u'Speci\xe1ln\xed', u'M\xe9dia', u'%s_diskuse'],
9.16 +'cv': [u'\u0421\u04f3\u0442\u0441\u0435 \u044f\u0432\u0430\u0441\u0441\u0438', u'\u0425\u0443\u0442\u0448\u0103\u043d\u0430\u043a\u0430\u043d', u'\u0425\u0443\u0442\u0448\u0103\u043d\u0430\u043a\u0430\u043d\u0103\u043d_\u043a\u0430\u043d\u0430\u0448\u043b\u0443_\u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0438', u'\u04f2\u043a\u0435\u0440\u0447\u0115\u043a', u'\u04f2\u043a\u0435\u0440\u0447\u0115\u043a\u0435_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438', u'MediaWiki', u'MediaWiki_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u0428\u0430\u0431\u043b\u043e\u043d\u0430_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438', u'\u041f\u0443\u043b\u0103\u0448\u0443', u'\u041f\u0443\u043b\u0103\u0448\u0103\u0432\u0430_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u043d\u0435_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438', u'\u042f\u0442\u0430\u0440\u043b\u0103', u'\u041c\u0435\u0434\u0438\u0430', u'%s_\u0441\u04f3\u0442\u0441\u0435_\u044f\u0432\u043c\u0430\u043b\u043b\u0438'],
9.17 +'cy': [u'Sgwrs', u'Defnyddiwr', u'Sgwrs_Defnyddiwr', u'Delwedd', u'Sgwrs_Delwedd', u'MediaWici', u'Sgwrs_MediaWici', u'Nodyn', u'Sgwrs_Nodyn', u'Cymorth', u'Sgwrs Cymorth', u'Categori', u'Sgwrs_Categori', u'Arbennig', u'Media', u'Sgwrs_%s'],
9.18 +'da': [u'Diskussion', u'Bruger', u'Brugerdiskussion', u'Billede', u'Billeddiskussion', u'MediaWiki', u'MediaWiki-diskussion', u'Skabelon', u'Skabelondiskussion', u'Hj\xe6lp', u'Hj\xe6lp-diskussion', u'Kategori', u'Kategoridiskussion', u'Speciel', u'Media', u'%s-diskussion'],
9.19 +'de': [u'Diskussion', u'Benutzer', u'Benutzer_Diskussion', u'Bild', u'Bild_Diskussion', u'MediaWiki', u'MediaWiki_Diskussion', u'Vorlage', u'Vorlage_Diskussion', u'Hilfe', u'Hilfe_Diskussion', u'Kategorie', u'Kategorie_Diskussion', u'Spezial', u'Media', u'%s_Diskussion'],
9.20 +'el': [u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7', u'\u03a7\u03c1\u03ae\u03c3\u03c4\u03b7\u03c2', u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7_\u03c7\u03c1\u03ae\u03c3\u03c4\u03b7', u'\u0395\u03b9\u03ba\u03cc\u03bd\u03b1', u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7_\u03b5\u03b9\u03ba\u03cc\u03bd\u03b1\u03c2', u'MediaWiki', u'MediaWiki_talk', u'\u03a0\u03c1\u03cc\u03c4\u03c5\u03c0\u03bf', u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7_\u03c0\u03c1\u03bf\u03c4\u03cd\u03c0\u03bf\u03c5', u'\u0392\u03bf\u03ae\u03b8\u03b5\u03b9\u03b1', u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7_\u03b2\u03bf\u03ae\u03b8\u03b5\u03b9\u03b1\u03c2', u'\u039a\u03b1\u03c4\u03b7\u03b3\u03bf\u03c1\u03af\u03b1', u'\u03a3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7_\u03ba\u03b1\u03c4\u03b7\u03b3\u03bf\u03c1\u03af\u03b1\u03c2', u'\u0395\u03b9\u03b4\u03b9\u03ba\u03cc', u'\u039c\u03ad\u03c3\u03bf\u03bd', u'%s_\u03c3\u03c5\u03b6\u03ae\u03c4\u03b7\u03c3\u03b7'],
9.21 +'en': [u'Talk', u'User', u'User_talk', u'Image', u'Image_talk', u'MediaWiki', u'MediaWiki_talk', u'Template', u'Template_talk', u'Help', u'Help_talk', u'Category', u'Category_talk', u'Special', u'Media', u'%s_talk'],
9.22 +'eo': [u'Diskuto', u'Vikipediisto', u'Vikipediista_diskuto', u'Dosiero', u'Dosiera_diskuto', u'MediaWiki', u'MediaWiki_diskuto', u'\u015cablono', u'\u015cablona_diskuto', u'Helpo', u'Helpa_diskuto', u'Kategorio', u'Kategoria_diskuto', u'Speciala', u'Media', u'%s_diskuto'],
9.23 +'es': [u'Discusi\xf3n', u'Usuario', u'Usuario_Discusi\xf3n', u'Imagen', u'Imagen_Discusi\xf3n', u'MediaWiki', u'MediaWiki_Discusi\xf3n', u'Plantilla', u'Plantilla_Discusi\xf3n', u'Ayuda', u'Ayuda_Discusi\xf3n', u'Categor\xeda', u'Categor\xeda_Discusi\xf3n', u'Especial', u'Media', u'%s_Discusi\xf3n'],
9.24 +'et': [u'Arutelu', u'Kasutaja', u'Kasutaja_arutelu', u'Pilt', u'Pildi_arutelu', u'MediaWiki', u'MediaWiki_arutelu', u'Mall', u'Malli_arutelu', u'Juhend', u'Juhendi_arutelu', u'Kategooria', u'Kategooria_arutelu', u'Eri', u'Meedia', u'%s_arutelu'],
9.25 +'eu': [u'Eztabaida', u'Lankide', u'Lankide_eztabaida', u'Irudi', u'Irudi_eztabaida', u'MediaWiki', u'MediaWiki_eztabaida', u'Txantiloi', u'Txantiloi_eztabaida', u'Laguntza', u'Laguntza_eztabaida', u'Kategoria', u'Kategoria_eztabaida', u'Aparteko', u'Media', u'%s_eztabaida'],
9.26 +'fa': [u'\u0628\u062d\u062b', u'\u06a9\u0627\u0631\u0628\u0631', u'\u0628\u062d\u062b_\u06a9\u0627\u0631\u0628\u0631', u'\u062a\u0635\u0648\u06cc\u0631', u'\u0628\u062d\u062b_\u062a\u0635\u0648\u06cc\u0631', u'\u0645\u062f\u06cc\u0627\u0648\u06cc\u06a9\u06cc', u'\u0628\u062d\u062b_\u0645\u062f\u06cc\u0627\u0648\u06cc\u06a9\u06cc', u'\u0627\u0644\u06af\u0648', u'\u0628\u062d\u062b_\u0627\u0644\u06af\u0648', u'\u0631\u0627\u0647\u0646\u0645\u0627', u'\u0628\u062d\u062b_\u0631\u0627\u0647\u0646\u0645\u0627', u'\u0631\u062f\u0647', u'\u0628\u062d\u062b_\u0631\u062f\u0647', u'\u0648\u06cc\u0698\u0647', u'\u0645\u062f\u06cc\u0627', u'\u0628\u062d\u062b_%s'],
9.27 +'fi': [u'Keskustelu', u'K\xe4ytt\xe4j\xe4', u'Keskustelu_k\xe4ytt\xe4j\xe4st\xe4', u'Kuva', u'Keskustelu_kuvasta', u'MediaWiki', u'MediaWiki_talk', u'Malline', u'Keskustelu_mallineesta', u'Ohje', u'Keskustelu_ohjeesta', u'Luokka', u'Keskustelu_luokasta', u'Toiminnot', u'Media', u'Keskustelu_{{grammar:elative|%s}}'],
9.28 +'fo': [u'Kjak', u'Br\xfakari', u'Br\xfakari_kjak', u'Mynd', u'Mynd_kjak', u'MidiaWiki', u'MidiaWiki_kjak', u'Fyrimynd', u'Fyrimynd_kjak', u'Hj\xe1lp', u'Hj\xe1lp_kjak', u'B\xf3lkur', u'B\xf3lkur_kjak', u'Serstakur', u'Mi\xf0il', u'%s_kjak'],
9.29 +'fr': [u'Discuter', u'Utilisateur', u'Discussion_Utilisateur', u'Image', u'Discussion_Image', u'MediaWiki', u'Discussion_MediaWiki', u'Mod\xe8le', u'Discussion_Mod\xe8le', u'Aide', u'Discussion_Aide', u'Cat\xe9gorie', u'Discussion_Cat\xe9gorie', u'Special', u'Media', u'Discussion_%s'],
9.30 +'fy': [u'Oerlis', u'Meidogger', u'Meidogger_oerlis', u'Ofbyld', u'Ofbyld_oerlis', u'MediaWiki', u'MediaWiki_oerlis', u'Berjocht', u'Berjocht_oerlis', u'Hulp', u'Hulp_oerlis', u'Kategory', u'Kategory_oerlis', u'Wiki', u'Media', u'%s_oerlis'],
9.31 +'ga': [u'Pl\xe9', u'\xdas\xe1ideoir', u'Pl\xe9_\xfas\xe1ideora', u'\xcdomh\xe1', u'Pl\xe9_\xedomh\xe1', u'MediaWiki', u'Pl\xe9_MediaWiki', u'Teimpl\xe9ad', u'Pl\xe9_teimpl\xe9id', u'Cabhair', u'Pl\xe9_cabhrach', u'Catag\xf3ir', u'Pl\xe9_catag\xf3ire', u'Speisialta', u'Me\xe1n', u'Pl\xe9_{{grammar:genitive|%s}}'],
9.32 +'he': [u'\u05e9\u05d9\u05d7\u05d4', u'\u05de\u05e9\u05ea\u05de\u05e9', u'\u05e9\u05d9\u05d7\u05ea_\u05de\u05e9\u05ea\u05de\u05e9', u'\u05ea\u05de\u05d5\u05e0\u05d4', u'\u05e9\u05d9\u05d7\u05ea_\u05ea\u05de\u05d5\u05e0\u05d4', u'\u05de\u05d3\u05d9\u05d4_\u05d5\u05d9\u05e7\u05d9', u'\u05e9\u05d9\u05d7\u05ea_\u05de\u05d3\u05d9\u05d4_\u05d5\u05d9\u05e7\u05d9', u'\u05ea\u05d1\u05e0\u05d9\u05ea', u'\u05e9\u05d9\u05d7\u05ea_\u05ea\u05d1\u05e0\u05d9\u05ea', u'\u05e2\u05d6\u05e8\u05d4', u'\u05e9\u05d9\u05d7\u05ea_\u05e2\u05d6\u05e8\u05d4', u'\u05e7\u05d8\u05d2\u05d5\u05e8\u05d9\u05d4', u'\u05e9\u05d9\u05d7\u05ea_\u05e7\u05d8\u05d2\u05d5\u05e8\u05d9\u05d4', u'\u05de\u05d9\u05d5\u05d7\u05d3', u'\u05de\u05d3\u05d9\u05d4', u'\u05e9\u05d9\u05d7\u05ea_%s'],
9.33 +'hi': [u'\u0935\u093e\u0930\u094d\u0924\u093e', u'\u0938\u0926\u0938\u094d\u092f', u'\u0938\u0926\u0938\u094d\u092f_\u0935\u093e\u0930\u094d\u0924\u093e', u'\u091a\u093f\u0924\u094d\u0930', u'\u091a\u093f\u0924\u094d\u0930_\u0935\u093e\u0930\u094d\u0924\u093e', u'MediaWiki', u'MediaWiki_talk', u'Template', u'Template_talk', u'Help', u'Help_Talk', u'\u0936\u094d\u0930\u0947\u0923\u0940', u'\u0936\u094d\u0930\u0947\u0923\u0940_\u0935\u093e\u0930\u094d\u0924\u093e', u'\u0935\u093f\u0936\u0947\u0937', u'Media', u'%s_\u0935\u093e\u0930\u094d\u0924\u093e'],
9.34 +'hr': [u'Razgovor', u'Suradnik', u'Razgovor_sa_suradnikom', u'Slika', u'Razgovor_o_slici', u'MediaWiki', u'MediaWiki_razgovor', u'Predlo\u017eak', u'Razgovor_o_predlo\u0161ku', u'Pomo\u0107', u'Razgovor_o_pomo\u0107i', u'Kategorija', u'Razgovor_o_kategoriji', u'Posebno', u'Mediji', u'Razgovor_%s'],
9.35 +'hu': [u'Vita', u'User', u'User_vita', u'K\xe9p', u'K\xe9p_vita', u'MediaWiki', u'MediaWiki_vita', u'Sablon', u'Sablon_vita', u'Seg\xedts\xe9g', u'Seg\xedts\xe9g_vita', u'Kateg\xf3ria', u'Kateg\xf3ria_vita', u'Speci\xe1lis', u'M\xe9dia', u'%s_vita'],
9.36 +'ia': [u'Discussion', u'Usator', u'Discussion_Usator', u'Imagine', u'Discussion_Imagine', u'MediaWiki', u'Discussion_MediaWiki', u'Patrono', u'Discussion_Patrono', u'Adjuta', u'Discussion_Adjuta', u'Categoria', u'Discussion_Categoria', u'Special', u'Media', u'Discussion_%s'],
9.37 +'id': [u'Pembicaraan', u'Pengguna', u'Pembicaraan_Pengguna', u'Berkas', u'Pembicaraan_Berkas', u'MediaWiki', u'Pembicaraan_MediaWiki', u'Templat', u'Pembicaraan_Templat', u'Bantuan', u'Pembicaraan_Bantuan', u'Kategori', u'Pembicaraan_Kategori', u'Istimewa', u'Media', u'Pembicaraan_%s'],
9.38 +'is': [u'Spjall', u'Notandi', u'Notandaspjall', u'Mynd', u'Myndaspjall', u'Melding', u'Meldingarspjall', u'Sni\xf0', u'Sni\xf0aspjall', u'Hj\xe1lp', u'Hj\xe1lparspjall', u'Flokkur', u'Flokkaspjall', u'Kerfiss\xed\xf0a', u'Mi\xf0ill', u'%sspjall'],
9.39 +'it': [u'Discussione', u'Utente', u'Discussioni_utente', u'Immagine', u'Discussioni_immagine', u'MediaWiki', u'Discussioni_MediaWiki', u'Template', u'Discussioni_template', u'Aiuto', u'Discussioni_aiuto', u'Categoria', u'Discussioni_categoria', u'Speciale', u'Media', u'Discussioni_%s'],
9.40 +'ja': [u'\u30ce\u30fc\u30c8', u'\u5229\u7528\u8005', u'\u5229\u7528\u8005\u2010\u4f1a\u8a71', u'\u753b\u50cf', u'\u753b\u50cf\u2010\u30ce\u30fc\u30c8', u'MediaWiki', u'MediaWiki\u2010\u30ce\u30fc\u30c8', u'Template', u'Template\u2010\u30ce\u30fc\u30c8', u'Help', u'Help\u2010\u30ce\u30fc\u30c8', u'Category', u'Category\u2010\u30ce\u30fc\u30c8', u'\u7279\u5225', u'Media', u'%s\u2010\u30ce\u30fc\u30c8'],
9.41 +'jv': [u'Dhiskusi', u'Panganggo', u'Dhiskusi_Panganggo', u'Gambar', u'Dhiskusi_Gambar', u'MediaWiki', u'Dhiskusi_MediaWiki', u'Cithakan', u'Dhiskusi_Cithakan', u'Pitulung', u'Dhiskusi_Pitulung', u'Kategori', u'Dhiskusi_Kategori', u'Astamiwa', u'Media', u'Dhiskusi_%s'],
9.42 +'ka': [u'\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10db\u10dd\u10db\u10ee\u10db\u10d0\u10e0\u10d4\u10d1\u10d4\u10da\u10d8', u'\u10db\u10dd\u10db\u10ee\u10db\u10d0\u10e0\u10d4\u10d1\u10d4\u10da\u10d8_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10e1\u10e3\u10e0\u10d0\u10d7\u10d8', u'\u10e1\u10e3\u10e0\u10d0\u10d7\u10d8_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10db\u10d4\u10d3\u10d8\u10d0\u10d5\u10d8\u10d9\u10d8', u'\u10db\u10d4\u10d3\u10d8\u10d0\u10d5\u10d8\u10d9\u10d8_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10d7\u10d0\u10e0\u10d2\u10d8', u'\u10d7\u10d0\u10e0\u10d2\u10d8_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10d3\u10d0\u10ee\u10db\u10d0\u10e0\u10d4\u10d1\u10d0', u'\u10d3\u10d0\u10ee\u10db\u10d0\u10e0\u10d4\u10d1\u10d0_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10d9\u10d0\u10e2\u10d4\u10d2\u10dd\u10e0\u10d8\u10d0', u'\u10d9\u10d0\u10e2\u10d4\u10d2\u10dd\u10e0\u10d8\u10d0_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0', u'\u10e1\u10de\u10d4\u10ea\u10d8\u10d0\u10da\u10e3\u10e0\u10d8', u'\u10db\u10d4\u10d3\u10d8\u10d0', u'%s_\u10d2\u10d0\u10dc\u10ee\u10d8\u10da\u10d5\u10d0'],
9.43 +'kn': [u'\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6\u0caa\u0cc1\u0c9f', u'\u0cb8\u0ca6\u0cb8\u0ccd\u0caf', u'\u0cb8\u0ca6\u0cb8\u0ccd\u0caf\u0cb0_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6\u0caa\u0cc1\u0c9f', u'\u0c9a\u0cbf\u0ca4\u0ccd\u0cb0', u'\u0c9a\u0cbf\u0ca4\u0ccd\u0cb0_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6\u0caa\u0cc1\u0c9f', u'\u0cae\u0cc0\u0ca1\u0cbf\u0caf\u0cb5\u0cbf\u0c95\u0cbf', u'\u0cae\u0cc0\u0ca1\u0cc0\u0caf\u0cb5\u0cbf\u0c95\u0cbf_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6', u'\u0c9f\u0cc6\u0c82\u0caa\u0ccd\u0cb2\u0cc7\u0c9f\u0cc1', u'\u0c9f\u0cc6\u0c82\u0caa\u0ccd\u0cb2\u0cc7\u0c9f\u0cc1_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6', u'\u0cb8\u0cb9\u0cbe\u0caf', u'\u0cb8\u0cb9\u0cbe\u0caf_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6', u'\u0cb5\u0cb0\u0ccd\u0c97', u'\u0cb5\u0cb0\u0ccd\u0c97_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6', u'\u0cb5\u0cbf\u0cb6\u0cc7\u0cb7', u'\u0cae\u0cc0\u0ca1\u0cbf\u0caf', u'%s_\u0c9a\u0cb0\u0ccd\u0c9a\u0cc6'],
9.44 +'ko': [u'\ud1a0\ub860', u'\uc0ac\uc6a9\uc790', u'\uc0ac\uc6a9\uc790\ud1a0\ub860', u'\uadf8\ub9bc', u'\uadf8\ub9bc\ud1a0\ub860', u'MediaWiki', u'MediaWiki\ud1a0\ub860', u'\ud2c0', u'\ud2c0\ud1a0\ub860', u'\ub3c4\uc6c0\ub9d0', u'\ub3c4\uc6c0\ub9d0\ud1a0\ub860', u'\ubd84\ub958', u'\ubd84\ub958\ud1a0\ub860', u'\ud2b9\uc218\uae30\ub2a5', u'Media', u'%s\ud1a0\ub860'],
9.45 +'ku': [u'N\xeeqa\u015f', u'Bikarh\xeaner', u'Bikarh\xeaner_n\xeeqa\u015f', u'W\xeane', u'W\xeane_n\xeeqa\u015f', u'MediaWiki', u'MediaWiki_n\xeeqa\u015f', u'\u015eablon', u'\u015eablon_n\xeeqa\u015f', u'Al\xeekar\xee', u'Al\xeekar\xee_n\xeeqa\u015f', u'Kategor\xee', u'Kategor\xee_n\xeeqa\u015f', u'Taybet', u'Medya', u'%s_n\xeeqa\u015f'],
9.46 +'la': [u'Disputatio', u'Usor', u'Disputatio_Usoris', u'Imago', u'Disputatio_Imaginis', u'MediaWiki', u'Disputatio_MediaWiki', u'Formula', u'Disputatio_Formulae', u'Auxilium', u'Disputatio_Auxilii', u'Categoria', u'Disputatio_Categoriae', u'Specialis', u'Media', u'Disputatio_{{grammar:genitive|%s}}'],
9.47 +'li': [u'Euverlik', u'Gebroeker', u'Euverlik_gebroeker', u'Aafbeilding', u'Euverlik_afbeelding', u'MediaWiki', u'Euverlik_MediaWiki', u'Sjabloon', u'Euverlik_sjabloon', u'Help', u'Euverlik_help', u'Kategorie', u'Euverlik_kategorie', u'Speciaal', u'Media', u'Euverlik_%s'],
9.48 +'lt': [u'Aptarimas', u'Naudotojas', u'Naudotojo_aptarimas', u'Vaizdas', u'Vaizdo_aptarimas', u'MediaWiki', u'MediaWiki_aptarimas', u'\u0160ablonas', u'\u0160ablono_aptarimas', u'Pagalba', u'Pagalbos_aptarimas', u'Kategorija', u'Kategorijos_aptarimas', u'Specialus', u'Medija', u'%s_aptarimas'],
9.49 +'lv': [u'Diskusija', u'Lietot\u0101js', u'Lietot\u0101ja_diskusija', u'Att\u0113ls', u'Att\u0113la_diskusija', u'MediaWiki', u'MediaWiki_diskusija', u'Veidne', u'Veidnes_diskusija', u'Pal\u012bdz\u012bba', u'Pal\u012bdz\u012bbas_diskusija', u'Kategorija', u'Kategorijas_diskusija', u'Special', u'Media', u'{{grammar:\u0123enit\u012bvs|%s}}_diskusija'],
9.50 +'mk': [u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440', u'\u041a\u043e\u0440\u0438\u0441\u043d\u0438\u043a', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0441\u043e_\u043a\u043e\u0440\u0438\u0441\u043d\u0438\u043a', u'\u0421\u043b\u0438\u043a\u0430', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_\u0441\u043b\u0438\u043a\u0430', u'\u041c\u0435\u0434\u0438\u0458\u0430\u0412\u0438\u043a\u0438', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_\u041c\u0435\u0434\u0438\u0458\u0430\u0412\u0438\u043a\u0438', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_\u0448\u0430\u0431\u043b\u043e\u043d', u'\u041f\u043e\u043c\u043e\u0448', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_\u043f\u043e\u043c\u043e\u0448', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u0458\u0430', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_\u043a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u0458\u0430', u'\u0421\u043f\u0435\u0446\u0438\u0458\u0430\u043b\u043d\u0438', u'\u041c\u0435\u0434\u0438\u0458\u0430', u'\u0420\u0430\u0437\u0433\u043e\u0432\u043e\u0440_\u0437\u0430_%s'],
9.51 +'ms': [u'Perbualan', u'Pengguna', u'Perbualan_Pengguna', u'Imej', u'Imej_Perbualan', u'MediaWiki', u'MediaWiki_Perbualan', u'Templat', u'Perbualan_Templat', u'Bantuan', u'Perbualan_Bantuan', u'Kategori', u'Perbualan_Kategori', u'Istimewa', u'Media', u'Perbualan_%s'],
9.52 +'nl': [u'Overleg', u'Gebruiker', u'Overleg_gebruiker', u'Afbeelding', u'Overleg_afbeelding', u'MediaWiki', u'Overleg_MediaWiki', u'Sjabloon', u'Overleg_sjabloon', u'Help', u'Overleg_help', u'Categorie', u'Overleg_categorie', u'Speciaal', u'Media', u'Overleg_%s'],
9.53 +'nn': [u'Diskusjon', u'Brukar', u'Brukardiskusjon', u'Fil', u'Fildiskusjon', u'MediaWiki', u'MediaWiki-diskusjon', u'Mal', u'Maldiskusjon', u'Hjelp', u'Hjelpdiskusjon', u'Kategori', u'Kategoridiskusjon', u'Spesial', u'Filpeikar', u'%s-diskusjon'],
9.54 +'no': [u'Diskusjon', u'Bruker', u'Brukerdiskusjon', u'Bilde', u'Bildediskusjon', u'MediaWiki', u'MediaWiki-diskusjon', u'Mal', u'Maldiskusjon', u'Hjelp', u'Hjelpdiskusjon', u'Kategori', u'Kategoridiskusjon', u'Spesial', u'Medium', u'%s-diskusjon'],
9.55 +'nv': [u"Naaltsoos_baa_yin\xedsht\\'\u012f\u0301", u"Choinish\\'\u012f\u012fh\xed", u"Choinish\\'\u012f\u012fh\xed_baa_yin\xedsht\\'\u012f\u0301", u"E\\'elyaa\xedg\xed\xed", u"E\\'elyaa\xedg\xed\xed_baa_yin\xedsht\\'\u012f\u0301", u'MediaWiki', u"MediaWiki_baa_yin\xedsht\\'\u012f\u0301", u'Template', u'Template_talk', u"An\xe1\\'\xe1lwo\\'", u"An\xe1\\'\xe1lwo\\'_baa_yin\xedsht\\'\u012f\u0301", u"T\\'\xe1\xe1\u0142\xe1h\xe1gi_\xe1t\\'\xe9ego", u"T\\'\xe1\xe1\u0142\xe1h\xe1gi_\xe1t\\'\xe9ego_baa_yin\xedsht\\'\u012f\u0301", u'Special', u'Media', u"%s_baa_yin\xedsht\\'\u012f\u0301"],
9.56 +'oc': [u'Discutir', u'Utilisator', u'Discutida_Utilisator', u'Imatge', u'Discutida_Imatge', u'Media\xf2iqui', u'Discutida_Media\xf2iqui', u'Mod\xe8l', u'Discutida_Mod\xe8l', u'Ajuda', u'Discutida_Ajuda', u'Categoria', u'Discutida_Categoria', u'Especial', u'Media', u'Discutida_%s'],
9.57 +'os': [u'\u0414\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'\u0410\u0440\u0445\u0430\u0439\xe6\u0433', u'\u0410\u0440\u0445\u0430\u0439\xe6\u0434\u0436\u044b_\u0434\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'\u041d\u044b\u0432', u'\u041d\u044b\u0432\u044b_\u0442\u044b\u0445\u0445\xe6\u0439_\u0434\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'MediaWiki', u'\u0414\u0438\u0441\u043a\u0443\u0441\u0441\u0438_MediaWiki', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u0428\u0430\u0431\u043b\u043e\u043d\u044b_\u0442\u044b\u0445\u0445\xe6\u0439_\u0434\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'\xc6\u0445\u0445\u0443\u044b\u0441', u'\xc6\u0445\u0445\u0443\u044b\u0441\u044b_\u0442\u044b\u0445\u0445\xe6\u0439_\u0434\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u0439\u044b_\u0442\u044b\u0445\u0445\xe6\u0439_\u0434\u0438\u0441\u043a\u0443\u0441\u0441\u0438', u'\u0421\xe6\u0440\u043c\u0430\u0433\u043e\u043d\u0434', u'Media\', //\u0447\u0442\u043e\u0431 \u043d\u0435 \u043f\u0438\u0441\u0430\u0442\u044c "\u041c\u0443\u043b\u044c\u0442\u0438\u043c\u0435\u0434\u0438\u044f', u'\u0414\u0438\u0441\u043a\u0443\u0441\u0441\u0438_%s'],
9.58 +'pa': [u'\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a2e\u0a48\u0a02\u0a2c\u0a30', u'\u0a2e\u0a48\u0a02\u0a2c\u0a30_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a24\u0a38\u0a35\u0a40\u0a30', u'\u0a24\u0a38\u0a35\u0a40\u0a30_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a2e\u0a40\u0a21\u0a40\u0a06\u0a35\u0a3f\u0a15\u0a3f', u'\u0a2e\u0a40\u0a21\u0a40\u0a06\u0a35\u0a3f\u0a15\u0a3f_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a28\u0a2e\u0a42\u0a28\u0a3e', u'\u0a28\u0a2e\u0a42\u0a28\u0a3e_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a2e\u0a26\u0a26', u'\u0a2e\u0a26\u0a26_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a38\u0a3c\u0a4d\u0a30\u0a47\u0a23\u0a40', u'\u0a38\u0a3c\u0a4d\u0a30\u0a47\u0a23\u0a40_\u0a1a\u0a30\u0a1a\u0a3e', u'\u0a16\u0a3e\u0a38', u'\u0a2e\u0a40\u0a21\u0a40\u0a06', u'%s_\u0a1a\u0a30\u0a1a\u0a3e'],
9.59 +'pl': [u'Dyskusja', u'U\u017cytkownik', u'Dyskusja_u\u017cytkownika', u'Grafika', u'Dyskusja_grafiki', u'MediaWiki', u'Dyskusja_MediaWiki', u'Szablon', u'Dyskusja_szablonu', u'Pomoc', u'Dyskusja_pomocy', u'Kategoria', u'Dyskusja_kategorii', u'Specjalna', u'Media', u'Dyskusja_%s'],
9.60 +'pt': [u'Discuss\xe3o', u'Utilizador', u'Utilizador_Discuss\xe3o', u'Imagem', u'Imagem_Discuss\xe3o', u'MediaWiki', u'MediaWiki_Discuss\xe3o', u'Predefini\xe7\xe3o', u'Predefini\xe7\xe3o_Discuss\xe3o', u'Ajuda', u'Ajuda_Discuss\xe3o', u'Categoria', u'Categoria_Discuss\xe3o', u'Especial', u'Media', u'%s_Discuss\xe3o'],
9.61 +'ro': [u'Discu\u0163ie', u'Utilizator', u'Discu\u0163ie_Utilizator', u'Imagine', u'Discu\u0163ie_Imagine', u'MediaWiki', u'Discu\u0163ie_MediaWiki', u'Format', u'Discu\u0163ie_Format', u'Ajutor', u'Discu\u0163ie_Ajutor', u'Categorie', u'Discu\u0163ie_Categorie', u'Special', u'Media', u'Discu\u0163ie_%s'],
9.62 +'ru': [u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435', u'\u0423\u0447\u0430\u0441\u0442\u043d\u0438\u043a', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_\u0443\u0447\u0430\u0441\u0442\u043d\u0438\u043a\u0430', u'\u0418\u0437\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u0438\u0435', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_\u0438\u0437\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u0438\u044f', u'MediaWiki', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_MediaWiki', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_\u0448\u0430\u0431\u043b\u043e\u043d\u0430', u'\u0421\u043f\u0440\u0430\u0432\u043a\u0430', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_\u0441\u043f\u0440\u0430\u0432\u043a\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u044f', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_\u043a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u0438', u'\u0421\u043b\u0443\u0436\u0435\u0431\u043d\u0430\u044f', u'\u041c\u0435\u0434\u0438\u0430', u'\u041e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u0435_{{grammar:genitive|%s}}'],
9.63 +'sk': [u'Diskusia', u'Redaktor', u'Diskusia_s_redaktorom', u'Obr\xe1zok', u'Diskusia_k_obr\xe1zku', u'MediaWiki', u'Diskusia_k_MediaWiki', u'\u0160abl\xf3na', u'Diskusia_k_\u0161abl\xf3ne', u'Pomoc', u'Diskusia_k_pomoci', u'Kateg\xf3ria', u'Diskusia_ku_kateg\xf3rii', u'\u0160peci\xe1lne', u'M\xe9di\xe1', u'Diskusia_k_{{grammar:dat\xedv|%s}}'],
9.64 +'sl': [u'Pogovor', u'Uporabnik', u'Uporabni\u0161ki_pogovor', u'Slika', u'Pogovor_o_sliki', u'MediaWiki', u'Pogovor_o_MediaWiki', u'Predloga', u'Pogovor_o_predlogi', u'Pomo\u010d', u'Pogovor_o_pomo\u010di', u'Kategorija', u'Pogovor_o_kategoriji', u'Posebno', u'Media', u'Pogovor_{{grammar:mestnik|%s}}'],
9.65 +'su': [u'Obrolan', u'Pamak\xe9', u'Obrolan_pamak\xe9', u'Gambar', u'Obrolan_gambar', u'MediaWiki', u'Obrolan_MediaWiki', u'Citakan', u'Obrolan_citakan', u'Pitulung', u'Obrolan_pitulung', u'Kategori', u'Obrolan_kategori', u'Husus', u'M\xe9dia', u'Obrolan_%s'],
9.66 +'sv': [u'Diskussion', u'Anv\xe4ndare', u'Anv\xe4ndardiskussion', u'Bild', u'Bilddiskussion', u'MediaWiki', u'MediaWiki-diskussion', u'Mall', u'Malldiskussion', u'Hj\xe4lp', u'Hj\xe4lpdiskussion', u'Kategori', u'Kategoridiskussion', u'Special', u'Media', u'%sdiskussion'],
9.67 +'ta': [u'\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0baa\u0baf\u0ba9\u0bb0\u0bcd', u'\u0baa\u0baf\u0ba9\u0bb0\u0bcd_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0baa\u0b9f\u0bbf\u0bae\u0bae\u0bcd', u'\u0baa\u0b9f\u0bbf\u0bae\u0baa\u0bcd_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0bae\u0bc0\u0b9f\u0bbf\u0baf\u0bbe\u0bb5\u0bbf\u0b95\u0bcd\u0b95\u0bbf', u'\u0bae\u0bc0\u0b9f\u0bbf\u0baf\u0bbe\u0bb5\u0bbf\u0b95\u0bcd\u0b95\u0bbf_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0bb5\u0bbe\u0bb0\u0bcd\u0baa\u0bcd\u0baa\u0bc1\u0bb0\u0bc1', u'\u0bb5\u0bbe\u0bb0\u0bcd\u0baa\u0bcd\u0baa\u0bc1\u0bb0\u0bc1_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0b89\u0ba4\u0bb5\u0bbf', u'\u0b89\u0ba4\u0bb5\u0bbf_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0baa\u0b95\u0bc1\u0baa\u0bcd\u0baa\u0bc1', u'\u0baa\u0b95\u0bc1\u0baa\u0bcd\u0baa\u0bc1_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1', u'\u0b9a\u0bbf\u0bb1\u0baa\u0bcd\u0baa\u0bc1', u'\u0b8a\u0b9f\u0b95\u0bae\u0bcd', u'%s_\u0baa\u0bc7\u0b9a\u0bcd\u0b9a\u0bc1'],
9.68 +'te': [u'\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c38\u0c2d\u0c4d\u0c2f\u0c41\u0c21\u0c41', u'\u0c38\u0c2d\u0c4d\u0c2f\u0c41\u0c28\u0c3f\u0c2a\u0c48_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c2c\u0c4a\u0c2e\u0c4d\u0c2e', u'\u0c2c\u0c4a\u0c2e\u0c4d\u0c2e\u0c2a\u0c48_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c2e\u0c40\u0c21\u0c3f\u0c2f\u0c3e\u0c35\u0c3f\u0c15\u0c40', u'\u0c2e\u0c40\u0c21\u0c3f\u0c2f\u0c3e\u0c35\u0c3f\u0c15\u0c40_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c2e\u0c42\u0c38', u'\u0c2e\u0c42\u0c38_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c38\u0c39\u0c3e\u0c2f\u0c2e\u0c41', u'\u0c38\u0c39\u0c3e\u0c2f\u0c2e\u0c41_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c35\u0c30\u0c4d\u0c17\u0c02', u'\u0c35\u0c30\u0c4d\u0c17\u0c02_\u0c1a\u0c30\u0c4d\u0c1a', u'\u0c2a\u0c4d\u0c30\u0c24\u0c4d\u0c2f\u0c47\u0c15', u'\u0c2e\u0c40\u0c21\u0c3f\u0c2f\u0c3e', u'%s_\u0c1a\u0c30\u0c4d\u0c1a'],
9.69 +'tg': [u'\u0411\u0430\u04b3\u0441', u'\u041a\u043e\u0440\u0431\u0430\u0440', u'\u0411\u0430\u04b3\u0441\u0438_\u043a\u043e\u0440\u0431\u0430\u0440', u'\u0410\u043a\u0441', u'\u0411\u0430\u04b3\u0441\u0438_\u0430\u043a\u0441', u'\u041c\u0435\u0434\u0438\u0430\u0432\u0438\u043a\u0438', u'\u0411\u0430\u04b3\u0441\u0438_\u043c\u0435\u0434\u0438\u0430\u0432\u0438\u043a\u0438', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u0411\u0430\u04b3\u0441\u0438_\u0448\u0430\u0431\u043b\u043e\u043d', u'\u0420\u043e\u04b3\u043d\u0430\u043c\u043e', u'\u0411\u0430\u04b3\u0441\u0438_\u0440\u043e\u04b3\u043d\u0430\u043c\u043e', u'\u0413\u0443\u0440\u04ef\u04b3', u'\u0411\u0430\u04b3\u0441\u0438_\u0433\u0443\u0440\u04ef\u04b3', u'\u0412\u0438\u0436\u0430', u'\u041c\u0435\u0434\u0438\u0430', u'\u0411\u0430\u04b3\u0441\u0438_%s'],
9.70 +'th': [u'\u0e1e\u0e39\u0e14\u0e04\u0e38\u0e22', u'\u0e1c\u0e39\u0e49\u0e43\u0e0a\u0e49', u'\u0e04\u0e38\u0e22\u0e01\u0e31\u0e1a\u0e1c\u0e39\u0e49\u0e43\u0e0a\u0e49', u'\u0e20\u0e32\u0e1e', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e20\u0e32\u0e1e', u'\u0e21\u0e35\u0e40\u0e14\u0e35\u0e22\u0e27\u0e34\u0e01\u0e34', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e21\u0e35\u0e40\u0e14\u0e35\u0e22\u0e27\u0e34\u0e01\u0e34', u'\u0e41\u0e21\u0e48\u0e41\u0e1a\u0e1a', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e41\u0e21\u0e48\u0e41\u0e1a\u0e1a', u'\u0e27\u0e34\u0e18\u0e35\u0e43\u0e0a\u0e49', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e27\u0e34\u0e18\u0e35\u0e43\u0e0a\u0e49', u'\u0e2b\u0e21\u0e27\u0e14\u0e2b\u0e21\u0e39\u0e48', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e27\u0e14\u0e2b\u0e21\u0e39\u0e48', u'\u0e1e\u0e34\u0e40\u0e28\u0e29', u'\u0e2a\u0e37\u0e48\u0e2d', u'\u0e04\u0e38\u0e22\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07%s'],
9.71 +'tr': [u'Tart\u0131\u015fma', u'Kullan\u0131c\u0131', u'Kullan\u0131c\u0131_mesaj', u'Resim', u'Resim_tart\u0131\u015fma', u'MedyaViki', u'MedyaViki_tart\u0131\u015fma', u'\u015eablon', u'\u015eablon_tart\u0131\u015fma', u'Yard\u0131m', u'Yard\u0131m_tart\u0131\u015fma', u'Kategori', u'Kategori_tart\u0131\u015fma', u'\xd6zel', u'Media', u'%s_tart\u0131\u015fma'],
9.72 +'tt': [u'B\xe4x\xe4s', u'\xc4\u011fz\xe4', u'\xc4\u011fz\xe4_b\xe4x\xe4se', u'R\xe4sem', u'R\xe4sem_b\xe4x\xe4se', u'MediaWiki', u'MediaWiki_b\xe4x\xe4se', u'\xdcrn\xe4k', u'\xdcrn\xe4k_b\xe4x\xe4se', u'Y\xe4rd\xe4m', u'Y\xe4rd\xe4m_b\xe4x\xe4se', u'T\xf6rkem', u'T\xf6rkem_b\xe4x\xe4se', u'Maxsus', u'Media', u'%s_b\xe4x\xe4se'],
9.73 +'uk': [u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f', u'\u041a\u043e\u0440\u0438\u0441\u0442\u0443\u0432\u0430\u0447', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_\u043a\u043e\u0440\u0438\u0441\u0442\u0443\u0432\u0430\u0447\u0430', u'\u0417\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u043d\u044f', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_\u0437\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u043d\u044f', u'MediaWiki', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_MediaWiki', u'\u0428\u0430\u0431\u043b\u043e\u043d', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_\u0448\u0430\u0431\u043b\u043e\u043d\u0443', u'\u0414\u043e\u0432\u0456\u0434\u043a\u0430', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_\u0434\u043e\u0432\u0456\u0434\u043a\u0438', u'\u041a\u0430\u0442\u0435\u0433\u043e\u0440\u0456\u044f', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_\u043a\u0430\u0442\u0435\u0433\u043e\u0440\u0456\u0457', u'\u0421\u043f\u0435\u0446\u0456\u0430\u043b\u044c\u043d\u0456', u'\u041c\u0435\u0434\u0456\u0430', u'\u041e\u0431\u0433\u043e\u0432\u043e\u0440\u0435\u043d\u043d\u044f_%s'],
9.74 +'ur': [u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644', u'\u0635\u0627\u0631\u0641', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u0635\u0627\u0631\u0641', u'\u062a\u0635\u0648\u06cc\u0631', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u062a\u0635\u0648\u06cc\u0631', u'\u0645\u06cc\u0688\u06cc\u0627\u0648\u06a9\u06cc', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u0645\u06cc\u0688\u06cc\u0627\u0648\u06a9\u06cc', u'\u0633\u0627\u0646\u0686\u06c1', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u0633\u0627\u0646\u0686\u06c1', u'\u0645\u0639\u0627\u0648\u0646\u062a', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u0645\u0639\u0627\u0648\u0646\u062a', u'\u0632\u0645\u0631\u06c1', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_\u0632\u0645\u0631\u06c1', u'\u062e\u0627\u0635', u'\u0632\u0631\u06cc\u0639\u06c1', u'\u062a\u0628\u0627\u062f\u0644\u06c2_\u062e\u06cc\u0627\u0644_%s'],
9.75 +'vi': [u'Th\u1ea3o_lu\u1eadn', u'Th\xe0nh_vi\xean', u'Th\u1ea3o_lu\u1eadn_Th\xe0nh_vi\xean', u'H\xecnh', u'Th\u1ea3o_lu\u1eadn_H\xecnh', u'MediaWiki', u'Th\u1ea3o_lu\u1eadn_MediaWiki', u'Ti\xeau_b\u1ea3n', u'Th\u1ea3o_lu\u1eadn_Ti\xeau_b\u1ea3n', u'Tr\u1ee3_gi\xfap', u'Th\u1ea3o_lu\u1eadn_Tr\u1ee3_gi\xfap', u'Th\u1ec3_lo\u1ea1i', u'Th\u1ea3o_lu\u1eadn_Th\u1ec3_lo\u1ea1i', u'\u0110\u1eb7c_bi\u1ec7t', u'Ph\u01b0\u01a1ng_ti\u1ec7n', u'Th\u1ea3o_lu\u1eadn_%s'],
9.76 +'wa': [u'Copene', u'Uzeu', u'Uzeu_copene', u'Im\xe5dje', u'Im\xe5dje_copene', u'MediaWiki', u'MediaWiki_copene', u'Modele', u'Modele_copene', u'Aidance', u'Aidance_copene', u'Categoreye', u'Categoreye_copene', u'Sipeci\xe5s', u'Media', u'%s_copene'],
9.77 +'yi': [u'\u05e8\u05e2\u05d3\u05df', u'\u05d1\u05d0\u05b7\u05e0\u05d9\u05e6\u05e2\u05e8', u'\u05d1\u05d0\u05b7\u05e0\u05d9\u05e6\u05e2\u05e8_\u05e8\u05e2\u05d3\u05df', u'\u05d1\u05d9\u05dc\u05d3', u'\u05d1\u05d9\u05dc\u05d3_\u05e8\u05e2\u05d3\u05df', u'\u05de\u05e2\u05d3\u05d9\u05e2\u05f0\u05d9\u05e7\u05d9', u'\u05de\u05e2\u05d3\u05d9\u05e2\u05f0\u05d9\u05e7\u05d9_\u05e8\u05e2\u05d3\u05df', u'\u05de\u05d5\u05e1\u05d8\u05e2\u05e8', u'\u05de\u05d5\u05e1\u05d8\u05e2\u05e8_\u05e8\u05e2\u05d3\u05df', u'\u05d4\u05d9\u05dc\u05e3', u'\u05d4\u05d9\u05dc\u05e3_\u05e8\u05e2\u05d3\u05df', u'\u05e7\u05d0\u05b7\u05d8\u05e2\u05d2\u05d0\u05b8\u05e8\u05d9\u05e2', u'\u05e7\u05d0\u05b7\u05d8\u05e2\u05d2\u05d0\u05b8\u05e8\u05d9\u05e2_\u05e8\u05e2\u05d3\u05df', u'\u05d1\u05d0\u05b7\u05d6\u05d5\u05e0\u05d3\u05e2\u05e8', u'\u05de\u05e2\u05d3\u05d9\u05e2', u'%s_\u05e8\u05e2\u05d3\u05df'],
9.78 +}
9.79 +
10.1 --- a/mwlib/options.py Thu Jul 03 17:11:35 2008 +0200
10.2 +++ b/mwlib/options.py Thu Jul 03 17:11:44 2008 +0200
10.3 @@ -79,5 +79,6 @@
10.4 self.options.collectionpage,
10.5 ))
10.6 self.metabook = metabook.parse_collection_page(wikitext)
10.7 + env.metabook = self.metabook
10.8 return env
10.9
11.1 --- a/mwlib/parser.py Thu Jul 03 17:11:35 2008 +0200
11.2 +++ b/mwlib/parser.py Thu Jul 03 17:11:44 2008 +0200
11.3 @@ -9,6 +9,8 @@
11.4
11.5 from mwlib.scanner import tokenize, TagToken, EndTagToken
11.6 from mwlib.log import Log
11.7 +from mwlib.namespace import namespace_maps, interwiki_map
11.8 +from mwlib.lang import languages
11.9
11.10 log = Log("parser")
11.11
11.12 @@ -193,82 +195,165 @@
11.13
11.14 class Link(Node):
11.15 target = None
11.16 - specialPrefixes = set(["wikipedia", "wiktionary", "wikibooks", "wikisource",
11.17 - "wikiquote", "meta", "talk",
11.18 - "commons", "wikinews", "template", "wikitravel", "help", "vorlage"])
11.19 - from mwlib.lang import languages
11.20 + from mwlib.namespace import NS_MAIN, NS_CATEGORY, NS_IMAGE
11.21 +
11.22 colon = False
11.23
11.24 def hasContent(self):
11.25 if self.target:
11.26 return True
11.27 return False
11.28 +
11.29 + @classmethod
11.30 + def _buildSpecializeMap(cls, namespaces, interwikis, langs):
11.31 + """
11.32 + Returns a dict mapping namespace prefixes to a tuple of form
11.33 + (link_class, namespace_value).
11.34 + """
11.35 + res = {}
11.36 + for name, num in namespaces.iteritems():
11.37 + name = name.lower()
11.38 + if num == cls.NS_CATEGORY:
11.39 + res[name] = (CategoryLink, num)
11.40 + elif num == cls.NS_IMAGE:
11.41 + res[name] = (ImageLink, num)
11.42 + else:
11.43 + res[name] = (NamespaceLink, num)
11.44 +
11.45 + for name, target in interwikis.iteritems():
11.46 + res[name.lower()] = (InterwikiLink, target)
11.47 +
11.48 + for lang in langs:
11.49 + res[lang.lower()] = (LangLink, lang)
11.50 +
11.51 + return res
11.52
11.53 + @classmethod
11.54 + def _setSpecializeMap(cls, nsMap='default'):
11.55 + cls._specializeMap = cls._buildSpecializeMap(
11.56 + namespace_maps[nsMap], interwiki_map, languages)
11.57 +
11.58 def _specialize(self):
11.59 + """
11.60 + Handles different forms of link, e.g.:
11.61 + - [[Foo]]
11.62 + - [[Foo|Bar]]
11.63 + - [[Category:Foo]]
11.64 + - [[:Category:Foo]]
11.65 + """
11.66 +
11.67 if not self.children:
11.68 return
11.69
11.70 if type(self.children[0]) != Text:
11.71 return
11.72
11.73 - self.target = target = self.children[0].caption.strip()
11.74 + # Handle [[Foo|Bar]]
11.75 + full_target = self.children[0].caption.strip()
11.76 del self.children[0]
11.77 if self.children and self.children[0] == Control("|"):
11.78 del self.children[0]
11.79 +
11.80 + # Mark [[:Category:Foo]]. See below
11.81 + if full_target.startswith(':'):
11.82 + self.colon = True
11.83 + full_target = full_target[1:]
11.84 + self.full_target = full_target
11.85
11.86 - pic = self.target
11.87 - if pic.startswith(':'):
11.88 - self.colon = True
11.89 -
11.90 -
11.91 -
11.92 - # pic == "Bild:Wappen_von_Budenheim.png"
11.93 -
11.94 - pic = pic.strip(': ')
11.95 - if ':' not in pic:
11.96 - return
11.97 -
11.98 - linktype, pic = pic.split(':', 1)
11.99 - linktype = linktype.lower().strip(" :")
11.100 -
11.101 - if linktype in ("category", "kategorie"):
11.102 - self.__class__ = CategoryLink
11.103 - self.target = pic.strip()
11.104 + try:
11.105 + ns, title = full_target.split(':', 1)
11.106 + except ValueError:
11.107 + self.namespace = self.NS_MAIN
11.108 + self.target = full_target
11.109 + self.__class__ = ArticleLink
11.110 return
11.111
11.112 - if linktype in self.specialPrefixes:
11.113 - self.__class__ = SpecialLink
11.114 - self.target = pic.strip()
11.115 - self.ns = linktype
11.116 + (self.__class__, self.namespace) = (
11.117 + self._specializeMap.get(ns.lower(), (ArticleLink, self.NS_MAIN)))
11.118
11.119 + if len(ns) == 2:
11.120 + # Assume this is an unlisted language
11.121 + self.__class__ = LangLink
11.122 + self.namespace = ns.lower()
11.123 +
11.124 + if self.colon and self.namespace != self.NS_MAIN:
11.125 + # [[:Category:Foo]] should not be a category link
11.126 + self.__class__ = NamespaceLink
11.127 +
11.128 + if self.namespace == self.NS_MAIN:
11.129 + # e.g. [[Blah: Foo]] is an ordinary article with a colon
11.130 + self.target = full_target
11.131 + else:
11.132 + self.target = title
11.133 +
11.134 + if self.__class__ == ImageLink:
11.135 + # Handle images. First ensure they are syntactically sound.
11.136 +
11.137 + try:
11.138 + prefix, suffix = title.rsplit('.', 1)
11.139 + if suffix.lower() in ['jpg', 'jpeg', 'gif', 'png', 'svg']:
11.140 + self._readArgs() # calls Image._readArgs()
11.141 + return
11.142 + except ValueError:
11.143 + pass
11.144 + # We can't handle this as an image, so default:
11.145 + self.__class__ = NamespaceLink
11.146 +
11.147 +
11.148 + capitalizeTarget = False # Wiki-dependent setting, e.g. Wikipedia => True
11.149 +
11.150 + _SPACE_RE = re.compile('[_\s]+')
11.151 + def _normalizeTarget(self):
11.152 + """
11.153 + Normalizes the format of the target with regards to whitespace and
11.154 + capitalization (depending on capitalizeTarget setting).
11.155 + """
11.156 +
11.157 + if not self.target:
11.158 return
11.159
11.160 - if linktype in self.languages:
11.161 - self.__class__ = LangLink
11.162 - return
11.163 -
11.164 -
11.165 - if linktype not in ("bild", "image", "imagen"):
11.166 - # assume a LangLink
11.167 - log.info("Unknown linktype:", repr(linktype))
11.168 - if len(linktype)==2:
11.169 - self.__class__ = LangLink
11.170 - return
11.171 -
11.172 -
11.173 - # pic == "Wappen_von_Budenheim.png"
11.174 -
11.175 - try:
11.176 - prefix, suffix = pic.rsplit('.', 1)
11.177 - except ValueError:
11.178 - return
11.179 + # really we should have a urllib.unquote() first, but in practice this
11.180 + # format may be rare enough to ignore
11.181
11.182 - if suffix.lower() in ['jpg', 'jpeg', 'gif', 'png', 'svg']:
11.183 - self.__class__ = ImageLink
11.184 - self.target = pic.strip()
11.185 + # [[__init__]] -> [[init]]
11.186 + self.target = self._SPACE_RE.sub(' ', self.target).strip()
11.187 + if self.capitalizeTarget:
11.188 + self.target = self.target[:1].upper() + self.target[1:]
11.189
11.190
11.191 +# Link forms:
11.192
11.193 +class ArticleLink(Link):
11.194 + pass
11.195 +
11.196 +class SpecialLink(Link):
11.197 + pass
11.198 +
11.199 +class NamespaceLink(SpecialLink):
11.200 + pass
11.201 +
11.202 +class InterwikiLink(SpecialLink):
11.203 + pass
11.204 +
11.205 +# Non-links with same syntax:
11.206 +
11.207 +class LangLink(Link):
11.208 + pass
11.209 +
11.210 +class CategoryLink(Link):
11.211 + pass
11.212 +
11.213 +class ImageLink(Link):
11.214 + target = None
11.215 + width = None
11.216 + height = None
11.217 + align = ''
11.218 + thumb = False
11.219 +
11.220 + def isInline(self):
11.221 + return not bool(self.align or self.thumb)
11.222 +
11.223 + def _readArgs(self):
11.224 idx = 0
11.225 last = []
11.226
11.227 @@ -328,25 +413,8 @@
11.228
11.229 if not self.children:
11.230 self.children = last
11.231 -
11.232 -class ImageLink(Link):
11.233 - target = None
11.234 - width = None
11.235 - height = None
11.236 - align = ''
11.237 - thumb = False
11.238 -
11.239 - def isInline(self):
11.240 - return not bool(self.align or self.thumb)
11.241 -
11.242 -class LangLink(Link):
11.243 - pass
11.244
11.245 -class CategoryLink(Link):
11.246 - pass
11.247 -
11.248 -class SpecialLink(Link):
11.249 - pass
11.250 +Link._setSpecializeMap('default') # initialise the Link class
11.251
11.252
11.253 class Text(Node):
11.254 @@ -365,10 +433,10 @@
11.255 class Control(Text):
11.256 pass
11.257
11.258 -def _parseAtomFromString(s):
11.259 +def _parseAtomFromString(s, lang=None):
11.260 from mwlib import scanner
11.261 tokens = scanner.tokenize(s)
11.262 - p=Parser(tokens)
11.263 + p=Parser(tokens, lang=lang)
11.264 try:
11.265 return p.parseAtom()
11.266 except Exception, err:
11.267 @@ -377,10 +445,10 @@
11.268
11.269
11.270
11.271 -def parse_fields_in_imagemap(imap):
11.272 +def parse_fields_in_imagemap(imap, lang=None):
11.273
11.274 if imap.image:
11.275 - imap.imagelink = _parseAtomFromString(u'[['+imap.image+']]')
11.276 + imap.imagelink = _parseAtomFromString(u'[['+imap.image+']]', lang=lang)
11.277 if not isinstance(imap.imagelink, ImageLink):
11.278 imap.imagelink = None
11.279
11.280 @@ -397,13 +465,22 @@
11.281 _ALPHA_RE = re.compile(r'[^\W\d_]+', re.UNICODE) # Matches alpha strings
11.282
11.283 class Parser(object):
11.284 - def __init__(self, tokens, name=''):
11.285 + def __init__(self, tokens, name='', lang=None):
11.286 self.tokens = tokens
11.287 + self.lang = lang
11.288 self.pos = 0
11.289 self.name = name
11.290 self.lastpos = 0
11.291 self.count = 0
11.292 -
11.293 +
11.294 + if lang:
11.295 + nsMap = '%s+en_mw' % lang
11.296 + if nsMap not in namespace_maps:
11.297 + nsMap = 'default'
11.298 + else:
11.299 + nsMap = 'default'
11.300 + Link._setSpecializeMap(nsMap)
11.301 +
11.302 from mwlib import tagext
11.303 self.tagextensions = tagext.default_registry
11.304
11.305 @@ -548,7 +625,7 @@
11.306
11.307 if not obj.children and obj.target:
11.308 # [[a]] -> [[a|a]]
11.309 - obj.append(Text(obj.target))
11.310 + obj.append(Text(obj.full_target))
11.311
11.312 if isinstance(obj, ImageLink):
11.313 return obj
11.314 @@ -559,6 +636,8 @@
11.315 # [[a|a]]b -> [[a|ab]]
11.316 obj.append(Text(m.group(0)), True)
11.317 self.tokens[self.pos] = ('TEXT', self.token[1][m.end():])
11.318 +
11.319 + obj._normalizeTarget()
11.320
11.321 return obj
11.322
11.323 @@ -668,7 +747,7 @@
11.324 continue
11.325
11.326 # either image link or text inside
11.327 - n=_parseAtomFromString(u'[['+x+']]')
11.328 + n=_parseAtomFromString(u'[['+x+']]', lang=self.lang)
11.329
11.330 if isinstance(n, ImageLink):
11.331 children.append(n)
11.332 @@ -684,7 +763,7 @@
11.333 txt = "".join(x.caption for x in node.find(Text))
11.334 from mwlib import imgmap
11.335 node.imagemap = imgmap.ImageMapFromString(txt)
11.336 - parse_fields_in_imagemap(node.imagemap)
11.337 + parse_fields_in_imagemap(node.imagemap, lang=self.lang)
11.338
11.339 #print node.imagemap
11.340 return node
12.1 --- a/mwlib/uparser.py Thu Jul 03 17:11:35 2008 +0200
12.2 +++ b/mwlib/uparser.py Thu Jul 03 17:11:44 2008 +0200
12.3 @@ -76,7 +76,7 @@
12.4
12.5 postprocessors = [removeBoilerplate, simplify, fixlitags]
12.6
12.7 -def parseString(title=None, raw=None, wikidb=None, revision=None):
12.8 +def parseString(title=None, raw=None, wikidb=None, revision=None, lang=None):
12.9 """parse article with title from raw mediawiki text"""
12.10 assert title is not None
12.11
12.12 @@ -86,12 +86,16 @@
12.13 if wikidb:
12.14 te = expander.Expander(raw, pagename=title, wikidb=wikidb)
12.15 input = te.expandTemplates()
12.16 + if lang is None and hasattr(wikidb, 'getSource'):
12.17 + src = wikidb.getSource()
12.18 + if src:
12.19 + lang = src.get('language')
12.20 else:
12.21 input = raw
12.22 -
12.23 +
12.24 tokens = scanner.tokenize(input, title)
12.25
12.26 - a = parser.Parser(tokens, title).parse()
12.27 + a = parser.Parser(tokens, title, lang=lang).parse()
12.28 a.caption = title
12.29 for x in postprocessors:
12.30 x(a)
13.1 --- a/mwlib/wiki.py Thu Jul 03 17:11:35 2008 +0200
13.2 +++ b/mwlib/wiki.py Thu Jul 03 17:11:44 2008 +0200
13.3 @@ -135,8 +135,8 @@
13.4 def get_source(self):
13.5 if 'source' in self.metabook:
13.6 return self.metabook['source']
13.7 - if hasattr(self.wiki, 'getMetaData'):
13.8 - return self.wiki.getMetaData()
13.9 + if hasattr(self.wiki, 'getSource'):
13.10 + return self.wiki.getSource()
13.11 return metabook.make_source(
13.12 name=self.configparser.get('wiki', 'name'),
13.13 url=self.configparser.get('wiki', 'url'),
14.1 --- a/mwlib/zipwiki.py Thu Jul 03 17:11:35 2008 +0200
14.2 +++ b/mwlib/zipwiki.py Thu Jul 03 17:11:44 2008 +0200
14.3 @@ -36,6 +36,9 @@
14.4 pass
14.5 return None
14.6
14.7 + def getSource(self):
14.8 + return self.metabook.get('source')
14.9 +
14.10 def getRawArticle(self, title, revision=None):
14.11 article = self._getArticle(title, revision=revision)
14.12 if article:
15.1 --- a/tests/test_parser.py Thu Jul 03 17:11:35 2008 +0200
15.2 +++ b/tests/test_parser.py Thu Jul 03 17:11:44 2008 +0200
15.3 @@ -610,3 +610,59 @@
15.4 assert u'<nosuchtag>' in txt, 'opening tag missing in asText()'
15.5 assert u'</nosuchtag>' in txt, 'closing tag missing in asText()'
15.6
15.7 +# Test varieties of link
15.8 +
15.9 +def test_plain_link():
15.10 + r=parse("[[bla]]").find(parser.ArticleLink)[0]
15.11 + assert r.target=='bla'
15.12 + assert r.children[0].caption == 'bla'
15.13 +
15.14 +def test_piped_link():
15.15 + r=parse("[[bla|blubb]]").find(parser.ArticleLink)[0]
15.16 + assert r.target=='bla'
15.17 + assert r.children[0].caption == 'blubb'
15.18 +
15.19 +def test_category_link():
15.20 + r=parse("[[category:bla]]").find(parser.CategoryLink)[0]
15.21 + assert r.target=='bla'
15.22 + assert r.namespace == 14
15.23 +
15.24 +def test_category_colon_link():
15.25 + r=parse("[[:category:bla]]").find(parser.SpecialLink)[0]
15.26 + assert r.target=='bla'
15.27 + assert r.namespace == 14
15.28 + assert not isinstance(r, parser.CategoryLink)
15.29 +
15.30 +def test_image_colon_link():
15.31 + r=parse("[[:image:bla.jpg]]").find(parser.SpecialLink)[0]
15.32 + assert r.target=='bla.jpg'
15.33 + assert r.namespace == 6
15.34 + assert not isinstance(r, parser.ImageLink)
15.35 +
15.36 +def test_interwiki_link():
15.37 + r=parse("[[wict:bla]]").find(parser.SpecialLink)[0]
15.38 + assert r.target=='bla'
15.39 + assert r.namespace == 'wiktionary'
15.40 +
15.41 +def test_language_link():
15.42 + r=parse("[[es:bla]]").find(parser.LangLink)[0]
15.43 + assert r.target=='bla'
15.44 + assert r.namespace == 'es'
15.45 +
15.46 +def test_long_language_link():
15.47 + r=parse("[[csb:bla]]").find(parser.LangLink)[0]
15.48 + assert r.target=='bla'
15.49 + assert r.namespace == 'csb'
15.50 +
15.51 +def test_normalize():
15.52 + r=parse("[[MediaWiki:__bla_ _]]").find(parser.LangLink)[0]
15.53 + assert r.target=='bla'
15.54 + assert r.namespace == 8
15.55 +
15.56 +def test_normalize_with_caps():
15.57 + parser.Link.capitalizeTarget = True
15.58 + r=parse("[[MediaWiki:__bla_ _ ]]").find(parser.LangLink)[0]
15.59 + parser.Link.capitalizeTarget = False
15.60 + assert r.target=='Bla'
15.61 + assert r.namespace == 8
15.62 + assert r.children[0].caption == 'MediaWiki:__bla_ _'