diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2016-02-04 19:46:04 -0500 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2016-02-04 19:46:04 -0500 |
| commit | 871c61c6b4351d4a9dd78ba1d70d6e1af8ffe1e7 (patch) | |
| tree | 99bce3e74cbcff075dcb6bceacd0f2e1133bef4d /BibTeX.py | |
| parent | fd20589a448cd19d036f18cabb1663c33a24375d (diff) | |
| download | anonbib-871c61c6b4351d4a9dd78ba1d70d6e1af8ffe1e7.tar.gz | |
Start cleaning: PEP8 and split the BibTeX.py monster
Diffstat (limited to 'BibTeX.py')
| -rw-r--r-- | BibTeX.py | 755 |
1 files changed, 18 insertions, 737 deletions
@@ -14,63 +14,43 @@ import copy import config -import rank +from entry import BibTeXEntry, buildAuthorTable +from utils import txtize, url_untranslate, smartJoin -__all__ = [ 'ParseError', 'BibTeX', 'BibTeXEntry', 'htmlize', - 'ParsedAuthor', 'FileIter', 'Parser', 'parseFile', - 'splitBibTeXEntriesBy', 'sortBibTexEntriesBy', ] +__all__ = ['ParseError', 'BibTeX', 'BibTeXEntry', 'htmlize', + 'ParsedAuthor', 'FileIter', 'Parser', 'parseFile', + 'splitEntriesBy', 'sortEntriesBy'] # List: must map from month number to month name. -MONTHS = [ None, - "January", "February", "March", "April", "May", "June", - "July", "August", "September", "October", "November", "December"] +MONTHS = [None, "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December"] + + -# Fields that we only care about for making web pages (BibTeX doesn't -# recognize them.) -WWW_FIELDS = [ 'www_section', 'www_important', 'www_remarks', - 'www_abstract_url', 'www_html_url', 'www_pdf_url', 'www_ps_url', - 'www_txt_url', 'www_ps_gz_url', 'www_amazon_url', - 'www_excerpt_url', 'www_publisher_url', - 'www_cache_section', 'www_tags' ] -def url_untranslate(s): - """Change a BibTeX key into a string suitable for use in a URL.""" - s = re.sub(r'([%<>`#, &_\';])', - lambda m: "_%02x"%ord(m.group(1)), - s) - s = s.replace("/",":") - return s class ParseError(Exception): """Raised on invalid BibTeX""" pass -def smartJoin(*lst): - """Equivalent to os.path.join, but handle"." and ".." entries a bit better. - """ - lst = [ item for item in lst if item != "." ] - idx = 0 - while idx < len(lst): - if idx > 0 and lst[idx] == "..": - del lst[idx] - else: - idx += 1 - return os.path.join(*lst) + class BibTeX: """A parsed BibTeX file""" def __init__(self): - self.entries = [] # List of BibTeXEntry - self.byKey = {} # Map from BibTeX key to BibTeX entry. + self.entries = [] # List of BibTeXEntry + self.byKey = {} # Map from BibTeX key to BibTeX entry. + def addEntry(self, ent): """Add a BibTeX entry to this file.""" k = ent.key if self.byKey.get(ent.key.lower()): - print >> sys.stderr, "Already have an entry named %s"%k + print >> sys.stderr, "Already have an entry named %s" % k return self.entries.append(ent) self.byKey[ent.key.lower()] = ent + def resolve(self): """Validate all entries in this file, and resolve cross-references""" seen = {} @@ -80,7 +60,7 @@ class BibTeX: try: cr = self.byKey[ent['crossref'].lower()] except KeyError: - print "No such crossref: %s"% ent['crossref'] + print "No such crossref: %s" % ent['crossref'] break if seen.get(cr.key): raise ParseError("Circular crossref at %s" % ent.key) @@ -88,7 +68,7 @@ class BibTeX: del ent.entries['crossref'] if cr.entryLine < ent.entryLine: - print "Warning: crossref %s used after declaration"%cr.key + print "Warning: crossref %s used after declaration" % cr.key for k in cr.entries.keys(): if ent.entries.has_key(k): @@ -113,47 +93,7 @@ class BibTeX: newEntries.append(ent) self.entries = newEntries -def buildAuthorTable(entries): - """Given a list of BibTeXEntry, return a map from parsed author name to - parsed canonical name. - """ - authorsByLast = {} - for e in entries: - for a in e.parsedAuthor: - authorsByLast.setdefault(tuple(a.last), []).append(a) - # map from author to collapsed author. - result = {} - for k,v in config.COLLAPSE_AUTHORS.items(): - a = parseAuthor(k)[0] - c = parseAuthor(v)[0] - result[c] = c - result[a] = c - - for e in entries: - for author in e.parsedAuthor: - if result.has_key(author): - continue - - c = author - for a in authorsByLast[tuple(author.last)]: - if a is author: - continue - c = c.collapsesTo(a) - result[author] = c - if 0: - for a,c in result.items(): - if a != c: - print "Collapsing authors: %s => %s" % (a,c) - if 0: - print parseAuthor("Franz Kaashoek")[0].collapsesTo( - parseAuthor("M. Franz Kaashoek")[0]) - print parseAuthor("Paul F. Syverson")[0].collapsesTo( - parseAuthor("Paul Syverson")[0]) - print parseAuthor("Paul Syverson")[0].collapsesTo( - parseAuthor("Paul F. Syverson")[0]) - - return result def splitEntriesBy(entries, field): """Take a list of BibTeX entries and the name of a bibtex field; return @@ -281,570 +221,9 @@ def sortEntriesByDate(entries): return [ t[2] for t in tmp ] -# List of fields that appear when we display the entries as BibTeX. -DISPLAYED_FIELDS = [ 'title', 'author', 'journal', 'booktitle', -'school', 'institution', 'organization', 'volume', 'number', 'year', -'month', 'address', 'location', 'chapter', 'edition', 'pages', 'editor', -'howpublished', 'key', 'publisher', 'type', 'note', 'series' ] - -class BibTeXEntry: - """A single BibTeX entry.""" - def __init__(self, type, key, entries): - self.type = type # What kind of entry is it? (@book,@injournal,etc) - self.key = key # What key does it have? - self.entries = entries # Map from key to value. - self.entryLine = 0 # Defined on this line number - def get(self, k, v=None): - return self.entries.get(k,v) - def has_key(self, k): - return self.entries.has_key(k) - def __getitem__(self, k): - return self.entries[k] - def __setitem__(self, k, v): - self.entries[k] = v - def __str__(self): - return self.format(70,1) - def getURL(self): - """Return the best URL to use for this paper, or None.""" - best = None - for field in ['www_pdf_url', 'www_ps_gz_url', 'www_ps_url', - 'www_html_url', 'www_txt_url', ]: - u = self.get(field) - if u: - if not best: - best = u - elif (best.startswith("http://citeseer.nj.nec.com/") - and not u.startswith("http://citeseer.nj.nec.com/")): - best = u - return best - - def format(self, width=70, indent=8, v=0, invStrings={}): - """Format this entry as BibTeX.""" - d = ["@%s{%s,\n" % (self.type, self.key)] - if v: - df = DISPLAYED_FIELDS[:] - for k in self.entries.keys(): - if k not in df: - df.append(k) - else: - df = DISPLAYED_FIELDS - for f in df: - if not self.entries.has_key(f): - continue - v = self.entries[f] - if v.startswith("<span class='bad'>"): - d.append("%%%%% ERROR: Missing field\n") - d.append("%% %s = {?????},\n"%f) - continue - np = v.translate(ALLCHARS, PRINTINGCHARS) - if np: - d.append("%%%%% "+("ERROR: Non-ASCII characters: '%r'\n"%np)) - d.append(" ") - v = v.replace("&", "&") - if invStrings.has_key(v): - s = "%s = %s,\n" %(f, invStrings[v]) - else: - s = "%s = {%s},\n" % (f, v) - d.append(_split(s,width,indent)) - d.append("}\n") - return "".join(d) - def resolve(self): - """Handle post-processing for this entry""" - a = self.get('author') - if a: - self.parsedAuthor = parseAuthor(a) - #print a - #print " => ",repr(self.parsedAuthor) - else: - self.parsedAuthor = None - - def isImportant(self): - """Return 1 iff this entry is marked as important""" - imp = self.get("www_important") - if imp and imp.strip().lower() not in ("no", "false", "0"): - return 1 - return 0 - - def check(self): - """Print any errors for this entry, and return true if there were - none.""" - errs = self._check() - for e in errs: - print e - return not errs - - def _check(self): - errs = [] - if self.type == 'inproceedings': - fields = 'booktitle', 'year' - elif self.type == 'incollection': - fields = 'booktitle', 'year' - elif self.type == 'proceedings': - fields = 'booktitle', 'editor' - elif self.type == 'article': - fields = 'journal', 'year' - elif self.type == 'techreport': - fields = 'institution', - elif self.type == 'misc': - fields = 'howpublished', - elif self.type in ('mastersthesis', 'phdthesis'): - fields = () - else: - fields = () - errs.append("ERROR: odd type %s"%self.type) - if self.type != 'proceedings': - fields += 'title', 'author', 'www_section', 'year' - - for field in fields: - if self.get(field) is None or \ - self.get(field).startswith("<span class='bad'>"): - errs.append("ERROR: %s has no %s field" % (self.key, field)) - self.entries[field] = "<span class='bad'>%s:??</span>"%field - - if self.type == 'inproceedings': - if self.get("booktitle"): - if not self['booktitle'].startswith("Proceedings of") and \ - not self['booktitle'].startswith("{Proceedings of"): - errs.append("ERROR: %s's booktitle (%r) doesn't start with 'Proceedings of'" % (self.key, self['booktitle'])) - - if self.has_key("pages") and not re.search(r'\d+--\d+', self['pages']): - errs.append("ERROR: Misformed pages in %s"%self.key) - - if self.type == 'proceedings': - if self.get('title'): - errs.append("ERROR: %s is a proceedings: it should have a booktitle, not a title." % self.key) - - for field, value in self.entries.items(): - if value.translate(ALLCHARS, PRINTINGCHARS): - errs.append("ERROR: %s.%s has non-ASCII characters"%( - self.key, field)) - if field.startswith("www_") and field not in WWW_FIELDS: - errs.append("ERROR: unknown www field %s"% field) - if value.strip()[-1:] == '.' and \ - field not in ("notes", "www_remarks", "author"): - errs.append("ERROR: %s.%s has an extraneous period"%(self.key, - field)) - return errs - - def biblio_to_html(self): - """Return the HTML for the citation portion of entry.""" - if self.type in ('inproceedings', 'incollection'): - booktitle = self['booktitle'] - bookurl = self.get('bookurl') - if bookurl: - m = PROCEEDINGS_RE.match(booktitle) - if m: - res = ["In the ", m.group(1), - '<a href="%s">'%bookurl, m.group(2), "</a>"] - else: - res = ['In the <a href="%s">%s</a>' % (bookurl,booktitle)] - else: - res = ["In the ", booktitle ] - - if self.get("edition"): - res.append(",") - res.append(self['edition']) - if self.get("location"): - res.append(", ") - res.append(self['location']) - elif self.get("address"): - res.append(", ") - res.append(self['address']) - res.append(", %s %s" % (self.get('month',""), self['year'])) - if not self.get('pages'): - pass - elif "-" in self['pages']: - res.append(", pages %s"%self['pages']) - else: - res.append(", page %s"%self['pages']) - elif self.type == 'article': - res = ["In "] - if self.get('journalurl'): - res.append('<a href="%s">%s</a>'% - (self['journalurl'],self['journal'])) - else: - res.append(self['journal']) - if self.get('volume'): - res.append(" <b>%s</b>"%self['volume']) - if self.get('number'): - res.append("(%s)"%self['number']) - res.append(", %s %s" % (self.get('month',""), self['year'])) - if not self.get('pages'): - pass - elif "-" in self['pages']: - res.append(", pages %s"%self['pages']) - else: - res.append(", page %s"%self['pages']) - elif self.type == 'techreport': - res = [ "%s %s %s" % (self['institution'], - self.get('type', 'technical report'), - self.get('number', "")) ] - if self.get('month') or self.get('year'): - res.append(", %s %s" % (self.get('month', ''), - self.get('year', ''))) - elif self.type == 'mastersthesis' or self.type == 'phdthesis': - if self.get('type'): - res = [self['type']] - elif self.type == 'mastersthesis': - res = ["Masters's thesis"] - else: - res = ["Ph.D. thesis"] - if self.get('school'): - res.append(", %s"%(self['school'])) - if self.get('month') or self.get('year'): - res.append(", %s %s" % (self.get('month', ''), - self.get('year', ''))) - elif self.type == 'book': - res = [self['publisher']] - if self.get('year'): - res.append(" "); - res.append(self.get('year')); - # res.append(", %s"%(self.get('year'))) - if self.get('series'): - res.append(","); - res.append(self['series']); - elif self.type == 'misc': - res = [self['howpublished']] - if self.get('month') or self.get('year'): - res.append(", %s %s" % (self.get('month', ''), - self.get('year', ''))) - if not self.get('pages'): - pass - elif "-" in self['pages']: - res.append(", pages %s"%self['pages']) - else: - res.append(", page %s"%self['pages']) - else: - res = ["<Odd type %s>"%self.type] - - res[0:0] = ["<span class='biblio'>"] - res.append(".</span>") - - bibtexurl = "./bibtex.html#%s"%url_untranslate(self.key) - res.append((" <span class='availability'>" - "(<a href='%s'>BibTeX entry</a>)" - "</span>") %bibtexurl) - return htmlize("".join(res)) - - def to_html(self, cache_path="./cache", base_url="."): - """Return the HTML for this entry.""" - imp = self.isImportant() - draft = self.get('year') == 'forthcoming' - if imp: - res = ["<li><div class='impEntry'><p class='impEntry'>" ] - elif draft: - res = ["<li><div class='draftEntry'><p class='draftEntry'>" ] - else: - res = ["<li><p class='entry'>"] - - if imp or not draft: - # Add a picture of the rank - # Only if year is known or paper important! - r = rank.get_rank_html(self['title'], self.get('year'), - update=False, base_url=base_url) - if r is not None: - res.append(r) - - res.append("<span class='title'><a name='%s'>%s</a></span>"%( - url_untranslate(self.key),htmlize(self['title']))) - - for cached in 0,1: - availability = [] - if not cached: - for which in [ "amazon", "excerpt", "publisher" ]: - key = "www_%s_url"%which - if self.get(key): - url=self[key] - url = unTeXescapeURL(url) - availability.append('<a href="%s">%s</a>' %(url,which)) - - cache_section = self.get('www_cache_section', ".") - if cache_section not in config.CACHE_SECTIONS: - if cache_section != ".": - print >>sys.stderr, "Unrecognized cache section %s"%( - cache_section) - cache_section="." - - for key, name, ext in (('www_abstract_url', 'abstract','abstract'), - ('www_html_url', 'HTML', 'html'), - ('www_pdf_url', 'PDF', 'pdf'), - ('www_ps_url', 'PS', 'ps'), - ('www_txt_url', 'TXT', 'txt'), - ('www_ps_gz_url', 'gzipped PS','ps.gz') - ): - if cached: - #XXXX the URL needs to be relative to the absolute - #XXXX cache path. - url = smartJoin(cache_path,cache_section, - "%s.%s"%(self.key,ext)) - fname = smartJoin(config.OUTPUT_DIR, config.CACHE_DIR, - cache_section, - "%s.%s"%(self.key,ext)) - if not os.path.exists(fname): continue - else: - url = self.get(key) - if not url: continue - url = unTeXescapeURL(url) - url = url.replace('&', '&') - availability.append('<a href="%s">%s</a>' %(url,name)) - - if availability: - res.append([" ", " "][cached]) - res.append("<span class='availability'>(") - if cached: res.append("Cached: ") - res.append(", ".join(availability)) - res.append(")</span>") - - res.append("<br /><span class='author'>by ") - - #res.append("\n<!-- %r -->\n" % self.parsedAuthor) - htmlAuthors = [ a.htmlizeWithLink() for a in self.parsedAuthor ] - - if len(htmlAuthors) == 1: - res.append(htmlAuthors[0]) - elif len(htmlAuthors) == 2: - res.append(" and ".join(htmlAuthors)) - else: - res.append(", ".join(htmlAuthors[:-1])) - res.append(", and ") - res.append(htmlAuthors[-1]) - if res[-1][-1] != '.': - res.append(".") - res.append("</span><br />\n") - res.append(self.biblio_to_html()) - res.append("<a href='#%s'>·</a>"%url_untranslate(self.key)) - res.append("</p>") - if self.get('www_remarks'): - res.append("<p class='remarks'>%s</p>"%htmlize( - self['www_remarks'])) - if imp or draft: - res.append("</div>") - res.append("</li>\n\n") - - return "".join(res) - -def unTeXescapeURL(s): - """Turn a URL as formatted in TeX into a real URL.""" - s = s.replace("\\_", "_") - s = s.replace("\\-", "") - s = s.replace("\{}", "") - s = s.replace("{}", "") - return s - -def TeXescapeURL(s): - """Escape a URL for use in TeX""" - s = s.replace("_", "\\_") - s = s.replace("~", "\{}~") - return s - -RE_LONE_AMP = re.compile(r'&([^a-z0-9])') -RE_LONE_I = re.compile(r'\\i([^a-z0-9])') -RE_ACCENT = re.compile(r'\\([\'`~^"c])([^{]|{.})') -RE_LIGATURE = re.compile(r'\\(AE|ae|OE|oe|AA|aa|O|o|ss)([^a-z0-9])') -ACCENT_MAP = { "'" : 'acute', - "`" : 'grave', - "~" : 'tilde', - "^" : 'circ', - '"' : 'uml', - "c" : 'cedil', - } -UNICODE_MAP = { 'ń' : 'ń', } -HTML_LIGATURE_MAP = { - 'AE' : 'Æ', - 'ae' : 'æ', - 'OE' : 'Œ', - 'oe' : 'œ', - 'AA' : 'Å', - 'aa' : 'å', - 'O' : 'Ø', - 'o' : 'ø', - 'ss' : 'ß', - } -RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)") -RE_PAGE_SPAN = re.compile(r"(\d)--(\d)") -def _unaccent(m): - accent,char = m.groups() - if char[0] == '{': - char = char[1] - accented = "&%s%s;" % (char, ACCENT_MAP[accent]) - return UNICODE_MAP.get(accented, accented) -def _unlig_html(m): - return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)],m.group(2)) -def htmlize(s): - """Turn a TeX string into good-looking HTML.""" - s = RE_LONE_AMP.sub(lambda m: "&%s" % m.group(1), s) - s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s) - s = RE_ACCENT.sub(_unaccent, s) - s = unTeXescapeURL(s) - s = RE_LIGATURE.sub(_unlig_html, s); - s = RE_TEX_CMD.sub("", s) - s = s.translate(ALLCHARS, "{}") - s = RE_PAGE_SPAN.sub(lambda m: "%s-%s"%(m.groups()), s) - s = s.replace("---", "—"); - s = s.replace("--", "–"); - return s - -def author_url(author): - """Given an author's name, return a URL for his/her homepage.""" - for pat, url in config.AUTHOR_RE_LIST: - if pat.search(author): - return url - return None - -def txtize(s): - """Turn a TeX string into decnent plaintext.""" - s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s) - s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s) - s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s) - s = RE_TEX_CMD.sub("", s) - s = s.translate(ALLCHARS, "{}") - return s - -PROCEEDINGS_RE = re.compile( - r'((?:proceedings|workshop record) of(?: the)? )(.*)', - re.I) - -class ParsedAuthor: - """The parsed name of an author. - - Eddie deserves credit for this incredibly hairy business. - """ - def __init__(self, first, von, last, jr): - self.first = first - self.von = von - self.last = last - self.jr = jr - self.collapsable = 1 - - self.html = htmlize(str(self)) - self.txt = txtize(str(self)) - - s = self.html - for pat in config.NO_COLLAPSE_AUTHORS_RE_LIST: - if pat.search(s): - self.collapsable = 0 - break - - def __eq__(self, o): - return ((self.first == o.first) and - (self.last == o.last) and - (self.von == o.von) and - (self.jr == o.jr)) - - def __hash__(self): - return hash(repr(self)) - - def collapsesTo(self, o): - """Return true iff 'o' could be a more canonical version of this author - """ - if not self.collapsable or not o.collapsable: - return self - - if self.last != o.last or self.von != o.von or self.jr != o.jr: - return self - if not self.first: - return o - - if len(self.first) == len(o.first): - n = [] - for a,b in zip(self.first, o.first): - if a == b: - n.append(a) - elif len(a) == 2 and a[1] == '.' and a[0] == b[0]: - n.append(b) - elif len(b) == 2 and b[1] == '.' and a[0] == b[0]: - n.append(a) - else: - return self - if n == self.first: - return self - elif n == o.first: - return o - else: - return self - else: - realname = max([len(n) for n in self.first+o.first])>2 - if not realname: - return self - - if len(self.first) < len(o.first): - short = self.first; long = o.first - else: - short = o.first; long = self.first - - initials_s = "".join([n[0] for n in short]) - initials_l = "".join([n[0] for n in long]) - idx = initials_l.find(initials_s) - if idx < 0: - return self - n = long[:idx] - for i in range(idx, idx+len(short)): - a = long[i]; b = short[i-idx] - if a == b: - n.append(a) - elif len(a) == 2 and a[1] == '.' and a[0] == b[0]: - n.append(b) - elif len(b) == 2 and b[1] == '.' and a[0] == b[0]: - n.append(a) - else: - return self - n += long[idx+len(short):] - - if n == self.first: - return self - elif n == o.first: - return o - else: - return self - - def __repr__(self): - return "ParsedAuthor(%r,%r,%r,%r)"%(self.first,self.von, - self.last,self.jr) - def __str__(self): - a = " ".join(self.first+self.von+self.last) - if self.jr: - return "%s, %s" % (a,self.jr) - return a - - def getHomepage(self): - s = self.html - for pat, url in config.AUTHOR_RE_LIST: - if pat.search(s): - return url - return None - - def getSortingName(self): - """Return a representation of this author's name in von-last-first-jr - order, unless overridden by ALPH """ - s = self.html - for pat,v in config.ALPHABETIZE_AUTHOR_AS_RE_LIST: - if pat.search(s): - return v - - return txtize(" ".join(self.von+self.last+self.first+self.jr)) - - def getSectionName(self): - """Return a HTML representation of this author's name in - last, first von, jr order""" - secname = " ".join(self.last) - more = self.first+self.von - if more: - secname += ", "+" ".join(more) - if self.jr: - secname += ", "+" ".join(self.jr) - secname = htmlize(secname) - return secname - - def htmlizeWithLink(self): - a = self.html - u = self.getHomepage() - if u: - return "<a href='%s'>%s</a>"%(u,a) - else: - return a def _split(s,w=79,indent=8): r = [] @@ -886,105 +265,7 @@ class FileIter: return self._next() -def parseAuthor(s): - try: - return _parseAuthor(s) - except: - print >>sys.stderr, "Internal error while parsing author %r"%s - raise - -def _parseAuthor(s): - """Take an author string and return a list of ParsedAuthor.""" - items = [] - - s = s.strip() - while s: - s = s.strip() - bracelevel = 0 - for i in xrange(len(s)): - if s[i] == '{': - bracelevel += 1 - elif s[i] == '}': - bracelevel -= 1 - elif bracelevel <= 0 and s[i] in " \t\n,": - break - if i+1 == len(s): - items.append(s) - else: - items.append(s[0:i]) - if (s[i] == ','): - items.append(',') - s = s[i+1:] - - authors = [[]] - for item in items: - if item == 'and': - authors.append([]) - else: - authors[-1].append(item) - - parsedAuthors = [] - # Split into first, von, last, jr - for author in authors: - commas = 0 - fvl = [] - vl = [] - f = [] - v = [] - l = [] - j = [] - cur = fvl - for item in author: - if item == ',': - if commas == 0: - vl = fvl - fvl = [] - cur = f - else: - j.extend(f) - cur = f = [] - commas += 1 - else: - cur.append(item) - if commas == 0: - split_von(f,v,l,fvl) - else: - f_tmp = [] - split_von(f_tmp,v,l,vl) - - parsedAuthors.append(ParsedAuthor(f,v,l,j)) - - return parsedAuthors - -ALLCHARS = "".join(map(chr,range(256))) -PRINTINGCHARS = "\t\n\r"+"".join(map(chr,range(32, 127))) -LC_CHARS = "abcdefghijklmnopqrstuvwxyz" -SV_DELCHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "@") -RE_ESCAPED = re.compile(r'\\.') -def split_von(f,v,l,x): - in_von = 0 - while x: - tt = t = x[0] - del x[0] - if tt[:2] == '{\\': - tt = tt.translate(ALLCHARS, SV_DELCHARS) - tt = RE_ESCAPED.sub("", tt) - tt = tt.translate(ALLCHARS, "{}") - if tt.translate(ALLCHARS, LC_CHARS) == "": - v.append(t) - in_von = 1 - elif in_von and f is not None: - l.append(t) - l.extend(x) - return - else: - f.append(t) - if not in_von: - l.append(f[-1]) - del f[-1] class Parser: @@ -1016,7 +297,7 @@ class Parser: def _parseKey(self, line): it = self.fileiter - line = _advance(it,line) + line = _advance(it, line) m = KEY_RE.match(line) if not m: raise ParseError("Expected key at line %s"%self.fileiter.lineno) |
