aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2016-02-04 20:00:45 -0500
committerThibaut Horel <thibaut.horel@gmail.com>2016-02-04 20:00:45 -0500
commitda7359cd452f2ded9e05e753fb125508343b8587 (patch)
treefab5a9cd876a52bf378ff19441c3224d72e9c1d3
parentc25f5fefbfdcbee15685778dcf25b82849e0d617 (diff)
downloadanonbib-da7359cd452f2ded9e05e753fb125508343b8587.tar.gz
Split sort utilities
-rw-r--r--BibTeX.py153
-rw-r--r--sortutils.py138
-rwxr-xr-xwriteHTML.py28
3 files changed, 159 insertions, 160 deletions
diff --git a/BibTeX.py b/BibTeX.py
index 750ca43..85228a1 100644
--- a/BibTeX.py
+++ b/BibTeX.py
@@ -9,24 +9,12 @@
import cStringIO
import re
import sys
-import os
-import copy
import config
-from entry import BibTeXEntry, buildAuthorTable
-from utils import txtize, url_untranslate, smartJoin
-
-__all__ = ['ParseError', 'BibTeX', 'BibTeXEntry', 'htmlize',
- 'ParsedAuthor', 'FileIter', 'Parser', 'parseFile',
- 'splitEntriesBy', 'sortEntriesBy']
-
-# List: must map from month number to month name.
-MONTHS = [None, "January", "February", "March", "April", "May", "June",
- "July", "August", "September", "October", "November", "December"]
-
-
+from entry import BibTeXEntry
+__all__ = ['ParseError', 'BibTeX', 'FileIter', 'Parser', 'parseFile']
class ParseError(Exception):
@@ -34,8 +22,6 @@ class ParseError(Exception):
pass
-
-
class BibTeX:
"""A parsed BibTeX file"""
def __init__(self):
@@ -95,135 +81,6 @@ class BibTeX:
-def splitEntriesBy(entries, field):
- """Take a list of BibTeX entries and the name of a bibtex field; return
- a map from vield value to list of entry."""
- result = {}
- for ent in entries:
- key = ent.get(field)
- if field in config.MULTI_VAL_FIELDS:
- key = [k.strip() for k in key.split(',')]
- else:
- key = [key]
- for k in key:
- try:
- result[k].append(ent)
- except:
- result[k] = [ent]
- return result
-
-def splitSortedEntriesBy(entries, field):
- """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'.
- Return a list of (field-value, entry-list) tuples, in the order
- given in 'entries'."""
- result = []
- curVal = "alskjdsakldj"
- curList = []
- for ent in entries:
- key = ent.get(field)
- if key == curVal:
- curList.append(ent)
- else:
- curVal = key
- curList = [ent]
- result.append((curVal, curList))
- return result
-
-def sortEntriesBy(entries, field, default):
- """Take inputs as in splitEntriesBy, and return a list of entries sorted
- by the value of 'field'. Entries without 'field' are sorted as if their
- value were 'default'.
- """
- tmp = []
- i = 0
- for ent in entries:
- i += 1
- v = ent.get(field, default)
- if v.startswith("<span class='bad'>"):
- v = default
- if field in config.MULTI_VAL_FIELDS:
- for v_j in v.split(','):
- ent_j = copy.deepcopy(ent)
- ent_j.__setitem__(field, v_j.strip())
- tmp.append((txtize(v_j.strip()), i, ent_j))
- else: tmp.append((txtize(v), i, ent))
- tmp.sort()
- return [ t[2] for t in tmp ]
-
-def splitEntriesByAuthor(entries):
- """Take a list of entries, sort them by author names, and return:
- a sorted list of (authorname-in-html, bibtex-entry-list) tuples,
- a map from authorname-in-html to name-for-url.
- Entries with multiple authors appear once per author.
- """
- collapsedAuthors = buildAuthorTable(entries)
- entries = sortEntriesByDate(entries)
- result = {} # Name in sorting order -> entries
- htmlResult = {} # name in sorting order -> Full name
- url_map = {} # Full name -> Url
- for ent in entries:
- for a in ent.parsedAuthor:
- canonical = collapsedAuthors[a]
- url = canonical.getHomepage()
- sortkey = canonical.getSortingName()
- secname = canonical.getSectionName()
- if url:
- url_map[secname] = url
-
- htmlResult[sortkey] = secname
- result.setdefault(sortkey, []).append(ent)
- sortnames = result.keys()
- sortnames.sort()
- sections = [ (htmlResult[n], result[n]) for n in sortnames ]
- return sections, url_map
-
-## def sortEntriesByAuthor(entries):
-## tmp = []
-## i = 0
-## for ent in entries:
-## i += 1
-## authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr))
-## for a in ent.parsedAuthor ]
-## tmp.append((tuple(authors), i, ent))
-## tmp.sort()
-## return [ t[2] for t in tmp ]
-
-def sortEntriesByDate(entries):
- """Sort a list of entries by their publication date."""
- tmp = []
- i = 0
- for ent in entries:
- i += 1
- if (ent.get('month') == "forthcoming" or
- ent.get('year') == "forthcoming"):
- tmp.append((20000*13, i, ent))
- continue
- try:
- monthname = ent.get("month")
- if monthname is not None:
- match = re.match(r"(\w+)--\w+", monthname)
- if match:
- monthname = match.group(1)
- mon = MONTHS.index(monthname)
- except ValueError:
- print "Unknown month %r in %s"%(ent.get("month"), ent.key)
- mon = 0
-
- try:
- date = int(ent['year'])*13 + mon
- except KeyError:
- print "ERROR: No year field in %s"%ent.key
- date = 10000*13
- except ValueError:
- date = 10000*13
- tmp.append((date, i, ent))
- tmp.sort()
- return [ t[2] for t in tmp ]
-
-
-
-
-
class FileIter:
@@ -238,14 +95,12 @@ class FileIter:
assert self.iter
self.lineno = 0
self._next = it.next
+
def next(self):
self.lineno += 1
return self._next()
-
-
-
class Parser:
"""Parser class: reads BibTeX from a file and returns a BibTeX object."""
## Fields
@@ -504,6 +359,7 @@ BRACE_CLOSE_RE = re.compile(r'^([^\{\}]*)\}(.*)')
BRACE_OPEN_RE = re.compile(r'^([^\{\}]*\{)(.*)')
RAW_DATA_RE = re.compile(r'^([^\s\},]+)(.*)')
+
def parseFile(filename, result=None):
"""Helper function: parse a single BibTeX file"""
f = FileIter(fname=filename)
@@ -514,6 +370,7 @@ def parseFile(filename, result=None):
e.check()
return r
+
def parseString(string, result=None):
"""Helper function: parse BibTeX from a string"""
f = FileIter(string=string)
diff --git a/sortutils.py b/sortutils.py
new file mode 100644
index 0000000..419fe03
--- /dev/null
+++ b/sortutils.py
@@ -0,0 +1,138 @@
+import config
+import copy
+from utils import txtize
+from entry import buildAuthorTable
+import re
+
+# List: must map from month number to month name.
+MONTHS = [None, "January", "February", "March", "April", "May", "June",
+ "July", "August", "September", "October", "November", "December"]
+
+
+def splitEntriesBy(entries, field):
+ """Take a list of BibTeX entries and the name of a bibtex field; return
+ a map from vield value to list of entry."""
+ result = {}
+ for ent in entries:
+ key = ent.get(field)
+ if field in config.MULTI_VAL_FIELDS:
+ key = [k.strip() for k in key.split(',')]
+ else:
+ key = [key]
+ for k in key:
+ try:
+ result[k].append(ent)
+ except:
+ result[k] = [ent]
+ return result
+
+
+def splitSortedEntriesBy(entries, field):
+ """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'.
+ Return a list of (field-value, entry-list) tuples, in the order
+ given in 'entries'."""
+ result = []
+ curVal = "alskjdsakldj"
+ curList = []
+ for ent in entries:
+ key = ent.get(field)
+ if key == curVal:
+ curList.append(ent)
+ else:
+ curVal = key
+ curList = [ent]
+ result.append((curVal, curList))
+ return result
+
+
+def sortEntriesBy(entries, field, default):
+ """Take inputs as in splitEntriesBy, and return a list of entries sorted
+ by the value of 'field'. Entries without 'field' are sorted as if their
+ value were 'default'.
+ """
+ tmp = []
+ i = 0
+ for ent in entries:
+ i += 1
+ v = ent.get(field, default)
+ if v.startswith("<span class='bad'>"):
+ v = default
+ if field in config.MULTI_VAL_FIELDS:
+ for v_j in v.split(','):
+ ent_j = copy.deepcopy(ent)
+ ent_j.__setitem__(field, v_j.strip())
+ tmp.append((txtize(v_j.strip()), i, ent_j))
+ else: tmp.append((txtize(v), i, ent))
+ tmp.sort()
+ return [t[2] for t in tmp]
+
+
+def splitEntriesByAuthor(entries):
+ """Take a list of entries, sort them by author names, and return:
+ a sorted list of (authorname-in-html, bibtex-entry-list) tuples,
+ a map from authorname-in-html to name-for-url.
+ Entries with multiple authors appear once per author.
+ """
+ collapsedAuthors = buildAuthorTable(entries)
+ entries = sortEntriesByDate(entries)
+ result = {} # Name in sorting order -> entries
+ htmlResult = {} # name in sorting order -> Full name
+ url_map = {} # Full name -> Url
+ for ent in entries:
+ for a in ent.parsedAuthor:
+ canonical = collapsedAuthors[a]
+ url = canonical.getHomepage()
+ sortkey = canonical.getSortingName()
+ secname = canonical.getSectionName()
+ if url:
+ url_map[secname] = url
+
+ htmlResult[sortkey] = secname
+ result.setdefault(sortkey, []).append(ent)
+ sortnames = result.keys()
+ sortnames.sort()
+ sections = [ (htmlResult[n], result[n]) for n in sortnames ]
+ return sections, url_map
+
+## def sortEntriesByAuthor(entries):
+## tmp = []
+## i = 0
+## for ent in entries:
+## i += 1
+## authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr))
+## for a in ent.parsedAuthor ]
+## tmp.append((tuple(authors), i, ent))
+## tmp.sort()
+## return [ t[2] for t in tmp ]
+
+def sortEntriesByDate(entries):
+ """Sort a list of entries by their publication date."""
+ tmp = []
+ i = 0
+ for ent in entries:
+ i += 1
+ if (ent.get('month') == "forthcoming" or
+ ent.get('year') == "forthcoming"):
+ tmp.append((20000*13, i, ent))
+ continue
+ try:
+ monthname = ent.get("month")
+ if monthname is not None:
+ match = re.match(r"(\w+)--\w+", monthname)
+ if match:
+ monthname = match.group(1)
+ mon = MONTHS.index(monthname)
+ except ValueError:
+ print "Unknown month %r in %s"%(ent.get("month"), ent.key)
+ mon = 0
+
+ try:
+ date = int(ent['year'])*13 + mon
+ except KeyError:
+ print "ERROR: No year field in %s"%ent.key
+ date = 10000*13
+ except ValueError:
+ date = 10000*13
+ tmp.append((date, i, ent))
+ tmp.sort()
+ return [ t[2] for t in tmp ]
diff --git a/writeHTML.py b/writeHTML.py
index 2a3d455..d4e11a0 100755
--- a/writeHTML.py
+++ b/writeHTML.py
@@ -12,6 +12,10 @@ assert sys.version_info[:3] >= (2,2,0)
os.umask(022)
import BibTeX
+from sortutils import sortEntriesBy, splitSortedEntriesBy, sortEntriesByDate,\
+ splitEntriesByAuthor
+from utils import smartJoin, url_untranslate
+from entry import buildAuthorTable
import config
def getTemplate(name):
@@ -40,10 +44,10 @@ def writeBody(f, sections, section_urls, cache_path, base_url):
sDisp = sDisp.replace(" ", "&nbsp;")
if u:
print >>f, ('<li><h3><a name="%s"></a><a href="%s">%s</a></h3>'%(
- (BibTeX.url_untranslate(s), u, sDisp)))
+ (url_untranslate(s), u, sDisp)))
else:
print >>f, ('<li><h3><a name="%s">%s</a></h3>'%(
- BibTeX.url_untranslate(s),sDisp))
+ url_untranslate(s),sDisp))
print >>f, "<ul class='expand'>"
for e in entries:
print >>f, e.to_html(cache_path=cache_path, base_url=base_url)
@@ -64,7 +68,7 @@ def writeHTML(f, sections, sectionType, fieldName, choices,
hts = re.sub(r'\s+', ' ', s.strip())
hts = s.replace(" ", "&nbsp;")
secStr.append("<p class='l2'><a href='#%s'>%s</a></p>\n"%
- ((BibTeX.url_untranslate(s),hts)))
+ ((url_untranslate(s),hts)))
secStr = "".join(secStr)
#
@@ -78,7 +82,7 @@ def writeHTML(f, sections, sectionType, fieldName, choices,
if t == tag:
tagListStr.append(name)
else:
- url = BibTeX.smartJoin(root, config.TAG_DIRECTORIES[t], "date.html")
+ url = smartJoin(root, config.TAG_DIRECTORIES[t], "date.html")
tagListStr.append("<a href='%s'>%s</a>"%(url, name))
tagListStr = "&nbsp;|&nbsp;".join(tagListStr)
@@ -130,7 +134,7 @@ def writePageSet(config, bib, tag):
tagdir = config.TAG_DIRECTORIES[tag]
outdir = os.path.join(config.OUTPUT_DIR, tagdir)
- cache_url_path = BibTeX.smartJoin("../"*pathLength(tagdir),
+ cache_url_path = smartJoin("../"*pathLength(tagdir),
config.CACHE_DIR)
if not os.path.exists(outdir):
os.makedirs(outdir, 0755)
@@ -138,12 +142,12 @@ def writePageSet(config, bib, tag):
## By topic.
- entries = BibTeX.sortEntriesBy(bib_entries, "www_section", "ZZZZZZZZZZZZZZ")
- entries = BibTeX.splitSortedEntriesBy(entries, "www_section")
+ entries = sortEntriesBy(bib_entries, "www_section", "ZZZZZZZZZZZZZZ")
+ entries = splitSortedEntriesBy(entries, "www_section")
if entries[-1][0].startswith("<span class='bad'>"):
entries[-1] = ("Miscellaneous", entries[-1][1])
- entries = [ (s, BibTeX.sortEntriesByDate(ents))
+ entries = [ (s, sortEntriesByDate(ents))
for s, ents in entries
]
@@ -159,8 +163,8 @@ def writePageSet(config, bib, tag):
## By date.
- entries = BibTeX.sortEntriesByDate(bib_entries)
- entries = BibTeX.splitSortedEntriesBy(entries, 'year')
+ entries = sortEntriesByDate(bib_entries)
+ entries = splitSortedEntriesBy(entries, 'year')
for idx in -1, -2:
try:
if entries[idx][0].startswith("<span class='bad'>"):
@@ -192,7 +196,7 @@ def writePageSet(config, bib, tag):
f.close()
## By author
- entries, url_map = BibTeX.splitEntriesByAuthor(bib_entries)
+ entries, url_map = splitEntriesByAuthor(bib_entries)
f = open(os.path.join(outdir,"author.html"), 'w')
writeHTML(f, entries, "Authors", "author",
@@ -226,7 +230,7 @@ def writePageSet(config, bib, tag):
print >>f, (
("<tr><td class='bibtex'><a name='%s'>%s</a>"
"<pre class='bibtex'>%s</pre></td></tr>")
- %(BibTeX.url_untranslate(ent.key), ent.key, ent.format(90,8,1)))
+ %(url_untranslate(ent.key), ent.key, ent.format(90,8,1)))
print >>f, footer
f.close()