diff options
Diffstat (limited to 'BibTeX.py')
| -rw-r--r-- | BibTeX.py | 153 |
1 files changed, 5 insertions, 148 deletions
@@ -9,24 +9,12 @@ import cStringIO import re import sys -import os -import copy import config -from entry import BibTeXEntry, buildAuthorTable -from utils import txtize, url_untranslate, smartJoin - -__all__ = ['ParseError', 'BibTeX', 'BibTeXEntry', 'htmlize', - 'ParsedAuthor', 'FileIter', 'Parser', 'parseFile', - 'splitEntriesBy', 'sortEntriesBy'] - -# List: must map from month number to month name. -MONTHS = [None, "January", "February", "March", "April", "May", "June", - "July", "August", "September", "October", "November", "December"] - - +from entry import BibTeXEntry +__all__ = ['ParseError', 'BibTeX', 'FileIter', 'Parser', 'parseFile'] class ParseError(Exception): @@ -34,8 +22,6 @@ class ParseError(Exception): pass - - class BibTeX: """A parsed BibTeX file""" def __init__(self): @@ -95,135 +81,6 @@ class BibTeX: -def splitEntriesBy(entries, field): - """Take a list of BibTeX entries and the name of a bibtex field; return - a map from vield value to list of entry.""" - result = {} - for ent in entries: - key = ent.get(field) - if field in config.MULTI_VAL_FIELDS: - key = [k.strip() for k in key.split(',')] - else: - key = [key] - for k in key: - try: - result[k].append(ent) - except: - result[k] = [ent] - return result - -def splitSortedEntriesBy(entries, field): - """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'. - Return a list of (field-value, entry-list) tuples, in the order - given in 'entries'.""" - result = [] - curVal = "alskjdsakldj" - curList = [] - for ent in entries: - key = ent.get(field) - if key == curVal: - curList.append(ent) - else: - curVal = key - curList = [ent] - result.append((curVal, curList)) - return result - -def sortEntriesBy(entries, field, default): - """Take inputs as in splitEntriesBy, and return a list of entries sorted - by the value of 'field'. Entries without 'field' are sorted as if their - value were 'default'. - """ - tmp = [] - i = 0 - for ent in entries: - i += 1 - v = ent.get(field, default) - if v.startswith("<span class='bad'>"): - v = default - if field in config.MULTI_VAL_FIELDS: - for v_j in v.split(','): - ent_j = copy.deepcopy(ent) - ent_j.__setitem__(field, v_j.strip()) - tmp.append((txtize(v_j.strip()), i, ent_j)) - else: tmp.append((txtize(v), i, ent)) - tmp.sort() - return [ t[2] for t in tmp ] - -def splitEntriesByAuthor(entries): - """Take a list of entries, sort them by author names, and return: - a sorted list of (authorname-in-html, bibtex-entry-list) tuples, - a map from authorname-in-html to name-for-url. - Entries with multiple authors appear once per author. - """ - collapsedAuthors = buildAuthorTable(entries) - entries = sortEntriesByDate(entries) - result = {} # Name in sorting order -> entries - htmlResult = {} # name in sorting order -> Full name - url_map = {} # Full name -> Url - for ent in entries: - for a in ent.parsedAuthor: - canonical = collapsedAuthors[a] - url = canonical.getHomepage() - sortkey = canonical.getSortingName() - secname = canonical.getSectionName() - if url: - url_map[secname] = url - - htmlResult[sortkey] = secname - result.setdefault(sortkey, []).append(ent) - sortnames = result.keys() - sortnames.sort() - sections = [ (htmlResult[n], result[n]) for n in sortnames ] - return sections, url_map - -## def sortEntriesByAuthor(entries): -## tmp = [] -## i = 0 -## for ent in entries: -## i += 1 -## authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr)) -## for a in ent.parsedAuthor ] -## tmp.append((tuple(authors), i, ent)) -## tmp.sort() -## return [ t[2] for t in tmp ] - -def sortEntriesByDate(entries): - """Sort a list of entries by their publication date.""" - tmp = [] - i = 0 - for ent in entries: - i += 1 - if (ent.get('month') == "forthcoming" or - ent.get('year') == "forthcoming"): - tmp.append((20000*13, i, ent)) - continue - try: - monthname = ent.get("month") - if monthname is not None: - match = re.match(r"(\w+)--\w+", monthname) - if match: - monthname = match.group(1) - mon = MONTHS.index(monthname) - except ValueError: - print "Unknown month %r in %s"%(ent.get("month"), ent.key) - mon = 0 - - try: - date = int(ent['year'])*13 + mon - except KeyError: - print "ERROR: No year field in %s"%ent.key - date = 10000*13 - except ValueError: - date = 10000*13 - tmp.append((date, i, ent)) - tmp.sort() - return [ t[2] for t in tmp ] - - - - - class FileIter: @@ -238,14 +95,12 @@ class FileIter: assert self.iter self.lineno = 0 self._next = it.next + def next(self): self.lineno += 1 return self._next() - - - class Parser: """Parser class: reads BibTeX from a file and returns a BibTeX object.""" ## Fields @@ -504,6 +359,7 @@ BRACE_CLOSE_RE = re.compile(r'^([^\{\}]*)\}(.*)') BRACE_OPEN_RE = re.compile(r'^([^\{\}]*\{)(.*)') RAW_DATA_RE = re.compile(r'^([^\s\},]+)(.*)') + def parseFile(filename, result=None): """Helper function: parse a single BibTeX file""" f = FileIter(fname=filename) @@ -514,6 +370,7 @@ def parseFile(filename, result=None): e.check() return r + def parseString(string, result=None): """Helper function: parse BibTeX from a string""" f = FileIter(string=string) |
