diff options
| -rw-r--r-- | BibTeX.py | 153 | ||||
| -rw-r--r-- | sortutils.py | 138 | ||||
| -rwxr-xr-x | writeHTML.py | 28 |
3 files changed, 159 insertions, 160 deletions
@@ -9,24 +9,12 @@ import cStringIO import re import sys -import os -import copy import config -from entry import BibTeXEntry, buildAuthorTable -from utils import txtize, url_untranslate, smartJoin - -__all__ = ['ParseError', 'BibTeX', 'BibTeXEntry', 'htmlize', - 'ParsedAuthor', 'FileIter', 'Parser', 'parseFile', - 'splitEntriesBy', 'sortEntriesBy'] - -# List: must map from month number to month name. -MONTHS = [None, "January", "February", "March", "April", "May", "June", - "July", "August", "September", "October", "November", "December"] - - +from entry import BibTeXEntry +__all__ = ['ParseError', 'BibTeX', 'FileIter', 'Parser', 'parseFile'] class ParseError(Exception): @@ -34,8 +22,6 @@ class ParseError(Exception): pass - - class BibTeX: """A parsed BibTeX file""" def __init__(self): @@ -95,135 +81,6 @@ class BibTeX: -def splitEntriesBy(entries, field): - """Take a list of BibTeX entries and the name of a bibtex field; return - a map from vield value to list of entry.""" - result = {} - for ent in entries: - key = ent.get(field) - if field in config.MULTI_VAL_FIELDS: - key = [k.strip() for k in key.split(',')] - else: - key = [key] - for k in key: - try: - result[k].append(ent) - except: - result[k] = [ent] - return result - -def splitSortedEntriesBy(entries, field): - """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'. - Return a list of (field-value, entry-list) tuples, in the order - given in 'entries'.""" - result = [] - curVal = "alskjdsakldj" - curList = [] - for ent in entries: - key = ent.get(field) - if key == curVal: - curList.append(ent) - else: - curVal = key - curList = [ent] - result.append((curVal, curList)) - return result - -def sortEntriesBy(entries, field, default): - """Take inputs as in splitEntriesBy, and return a list of entries sorted - by the value of 'field'. Entries without 'field' are sorted as if their - value were 'default'. - """ - tmp = [] - i = 0 - for ent in entries: - i += 1 - v = ent.get(field, default) - if v.startswith("<span class='bad'>"): - v = default - if field in config.MULTI_VAL_FIELDS: - for v_j in v.split(','): - ent_j = copy.deepcopy(ent) - ent_j.__setitem__(field, v_j.strip()) - tmp.append((txtize(v_j.strip()), i, ent_j)) - else: tmp.append((txtize(v), i, ent)) - tmp.sort() - return [ t[2] for t in tmp ] - -def splitEntriesByAuthor(entries): - """Take a list of entries, sort them by author names, and return: - a sorted list of (authorname-in-html, bibtex-entry-list) tuples, - a map from authorname-in-html to name-for-url. - Entries with multiple authors appear once per author. - """ - collapsedAuthors = buildAuthorTable(entries) - entries = sortEntriesByDate(entries) - result = {} # Name in sorting order -> entries - htmlResult = {} # name in sorting order -> Full name - url_map = {} # Full name -> Url - for ent in entries: - for a in ent.parsedAuthor: - canonical = collapsedAuthors[a] - url = canonical.getHomepage() - sortkey = canonical.getSortingName() - secname = canonical.getSectionName() - if url: - url_map[secname] = url - - htmlResult[sortkey] = secname - result.setdefault(sortkey, []).append(ent) - sortnames = result.keys() - sortnames.sort() - sections = [ (htmlResult[n], result[n]) for n in sortnames ] - return sections, url_map - -## def sortEntriesByAuthor(entries): -## tmp = [] -## i = 0 -## for ent in entries: -## i += 1 -## authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr)) -## for a in ent.parsedAuthor ] -## tmp.append((tuple(authors), i, ent)) -## tmp.sort() -## return [ t[2] for t in tmp ] - -def sortEntriesByDate(entries): - """Sort a list of entries by their publication date.""" - tmp = [] - i = 0 - for ent in entries: - i += 1 - if (ent.get('month') == "forthcoming" or - ent.get('year') == "forthcoming"): - tmp.append((20000*13, i, ent)) - continue - try: - monthname = ent.get("month") - if monthname is not None: - match = re.match(r"(\w+)--\w+", monthname) - if match: - monthname = match.group(1) - mon = MONTHS.index(monthname) - except ValueError: - print "Unknown month %r in %s"%(ent.get("month"), ent.key) - mon = 0 - - try: - date = int(ent['year'])*13 + mon - except KeyError: - print "ERROR: No year field in %s"%ent.key - date = 10000*13 - except ValueError: - date = 10000*13 - tmp.append((date, i, ent)) - tmp.sort() - return [ t[2] for t in tmp ] - - - - - class FileIter: @@ -238,14 +95,12 @@ class FileIter: assert self.iter self.lineno = 0 self._next = it.next + def next(self): self.lineno += 1 return self._next() - - - class Parser: """Parser class: reads BibTeX from a file and returns a BibTeX object.""" ## Fields @@ -504,6 +359,7 @@ BRACE_CLOSE_RE = re.compile(r'^([^\{\}]*)\}(.*)') BRACE_OPEN_RE = re.compile(r'^([^\{\}]*\{)(.*)') RAW_DATA_RE = re.compile(r'^([^\s\},]+)(.*)') + def parseFile(filename, result=None): """Helper function: parse a single BibTeX file""" f = FileIter(fname=filename) @@ -514,6 +370,7 @@ def parseFile(filename, result=None): e.check() return r + def parseString(string, result=None): """Helper function: parse BibTeX from a string""" f = FileIter(string=string) diff --git a/sortutils.py b/sortutils.py new file mode 100644 index 0000000..419fe03 --- /dev/null +++ b/sortutils.py @@ -0,0 +1,138 @@ +import config +import copy +from utils import txtize +from entry import buildAuthorTable +import re + +# List: must map from month number to month name. +MONTHS = [None, "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December"] + + +def splitEntriesBy(entries, field): + """Take a list of BibTeX entries and the name of a bibtex field; return + a map from vield value to list of entry.""" + result = {} + for ent in entries: + key = ent.get(field) + if field in config.MULTI_VAL_FIELDS: + key = [k.strip() for k in key.split(',')] + else: + key = [key] + for k in key: + try: + result[k].append(ent) + except: + result[k] = [ent] + return result + + +def splitSortedEntriesBy(entries, field): + """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'. + Return a list of (field-value, entry-list) tuples, in the order + given in 'entries'.""" + result = [] + curVal = "alskjdsakldj" + curList = [] + for ent in entries: + key = ent.get(field) + if key == curVal: + curList.append(ent) + else: + curVal = key + curList = [ent] + result.append((curVal, curList)) + return result + + +def sortEntriesBy(entries, field, default): + """Take inputs as in splitEntriesBy, and return a list of entries sorted + by the value of 'field'. Entries without 'field' are sorted as if their + value were 'default'. + """ + tmp = [] + i = 0 + for ent in entries: + i += 1 + v = ent.get(field, default) + if v.startswith("<span class='bad'>"): + v = default + if field in config.MULTI_VAL_FIELDS: + for v_j in v.split(','): + ent_j = copy.deepcopy(ent) + ent_j.__setitem__(field, v_j.strip()) + tmp.append((txtize(v_j.strip()), i, ent_j)) + else: tmp.append((txtize(v), i, ent)) + tmp.sort() + return [t[2] for t in tmp] + + +def splitEntriesByAuthor(entries): + """Take a list of entries, sort them by author names, and return: + a sorted list of (authorname-in-html, bibtex-entry-list) tuples, + a map from authorname-in-html to name-for-url. + Entries with multiple authors appear once per author. + """ + collapsedAuthors = buildAuthorTable(entries) + entries = sortEntriesByDate(entries) + result = {} # Name in sorting order -> entries + htmlResult = {} # name in sorting order -> Full name + url_map = {} # Full name -> Url + for ent in entries: + for a in ent.parsedAuthor: + canonical = collapsedAuthors[a] + url = canonical.getHomepage() + sortkey = canonical.getSortingName() + secname = canonical.getSectionName() + if url: + url_map[secname] = url + + htmlResult[sortkey] = secname + result.setdefault(sortkey, []).append(ent) + sortnames = result.keys() + sortnames.sort() + sections = [ (htmlResult[n], result[n]) for n in sortnames ] + return sections, url_map + +## def sortEntriesByAuthor(entries): +## tmp = [] +## i = 0 +## for ent in entries: +## i += 1 +## authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr)) +## for a in ent.parsedAuthor ] +## tmp.append((tuple(authors), i, ent)) +## tmp.sort() +## return [ t[2] for t in tmp ] + +def sortEntriesByDate(entries): + """Sort a list of entries by their publication date.""" + tmp = [] + i = 0 + for ent in entries: + i += 1 + if (ent.get('month') == "forthcoming" or + ent.get('year') == "forthcoming"): + tmp.append((20000*13, i, ent)) + continue + try: + monthname = ent.get("month") + if monthname is not None: + match = re.match(r"(\w+)--\w+", monthname) + if match: + monthname = match.group(1) + mon = MONTHS.index(monthname) + except ValueError: + print "Unknown month %r in %s"%(ent.get("month"), ent.key) + mon = 0 + + try: + date = int(ent['year'])*13 + mon + except KeyError: + print "ERROR: No year field in %s"%ent.key + date = 10000*13 + except ValueError: + date = 10000*13 + tmp.append((date, i, ent)) + tmp.sort() + return [ t[2] for t in tmp ] diff --git a/writeHTML.py b/writeHTML.py index 2a3d455..d4e11a0 100755 --- a/writeHTML.py +++ b/writeHTML.py @@ -12,6 +12,10 @@ assert sys.version_info[:3] >= (2,2,0) os.umask(022) import BibTeX +from sortutils import sortEntriesBy, splitSortedEntriesBy, sortEntriesByDate,\ + splitEntriesByAuthor +from utils import smartJoin, url_untranslate +from entry import buildAuthorTable import config def getTemplate(name): @@ -40,10 +44,10 @@ def writeBody(f, sections, section_urls, cache_path, base_url): sDisp = sDisp.replace(" ", " ") if u: print >>f, ('<li><h3><a name="%s"></a><a href="%s">%s</a></h3>'%( - (BibTeX.url_untranslate(s), u, sDisp))) + (url_untranslate(s), u, sDisp))) else: print >>f, ('<li><h3><a name="%s">%s</a></h3>'%( - BibTeX.url_untranslate(s),sDisp)) + url_untranslate(s),sDisp)) print >>f, "<ul class='expand'>" for e in entries: print >>f, e.to_html(cache_path=cache_path, base_url=base_url) @@ -64,7 +68,7 @@ def writeHTML(f, sections, sectionType, fieldName, choices, hts = re.sub(r'\s+', ' ', s.strip()) hts = s.replace(" ", " ") secStr.append("<p class='l2'><a href='#%s'>%s</a></p>\n"% - ((BibTeX.url_untranslate(s),hts))) + ((url_untranslate(s),hts))) secStr = "".join(secStr) # @@ -78,7 +82,7 @@ def writeHTML(f, sections, sectionType, fieldName, choices, if t == tag: tagListStr.append(name) else: - url = BibTeX.smartJoin(root, config.TAG_DIRECTORIES[t], "date.html") + url = smartJoin(root, config.TAG_DIRECTORIES[t], "date.html") tagListStr.append("<a href='%s'>%s</a>"%(url, name)) tagListStr = " | ".join(tagListStr) @@ -130,7 +134,7 @@ def writePageSet(config, bib, tag): tagdir = config.TAG_DIRECTORIES[tag] outdir = os.path.join(config.OUTPUT_DIR, tagdir) - cache_url_path = BibTeX.smartJoin("../"*pathLength(tagdir), + cache_url_path = smartJoin("../"*pathLength(tagdir), config.CACHE_DIR) if not os.path.exists(outdir): os.makedirs(outdir, 0755) @@ -138,12 +142,12 @@ def writePageSet(config, bib, tag): ## By topic. - entries = BibTeX.sortEntriesBy(bib_entries, "www_section", "ZZZZZZZZZZZZZZ") - entries = BibTeX.splitSortedEntriesBy(entries, "www_section") + entries = sortEntriesBy(bib_entries, "www_section", "ZZZZZZZZZZZZZZ") + entries = splitSortedEntriesBy(entries, "www_section") if entries[-1][0].startswith("<span class='bad'>"): entries[-1] = ("Miscellaneous", entries[-1][1]) - entries = [ (s, BibTeX.sortEntriesByDate(ents)) + entries = [ (s, sortEntriesByDate(ents)) for s, ents in entries ] @@ -159,8 +163,8 @@ def writePageSet(config, bib, tag): ## By date. - entries = BibTeX.sortEntriesByDate(bib_entries) - entries = BibTeX.splitSortedEntriesBy(entries, 'year') + entries = sortEntriesByDate(bib_entries) + entries = splitSortedEntriesBy(entries, 'year') for idx in -1, -2: try: if entries[idx][0].startswith("<span class='bad'>"): @@ -192,7 +196,7 @@ def writePageSet(config, bib, tag): f.close() ## By author - entries, url_map = BibTeX.splitEntriesByAuthor(bib_entries) + entries, url_map = splitEntriesByAuthor(bib_entries) f = open(os.path.join(outdir,"author.html"), 'w') writeHTML(f, entries, "Authors", "author", @@ -226,7 +230,7 @@ def writePageSet(config, bib, tag): print >>f, ( ("<tr><td class='bibtex'><a name='%s'>%s</a>" "<pre class='bibtex'>%s</pre></td></tr>") - %(BibTeX.url_untranslate(ent.key), ent.key, ent.format(90,8,1))) + %(url_untranslate(ent.key), ent.key, ent.format(90,8,1))) print >>f, footer f.close() |
