Split sort utilities

author: Thibaut Horel <thibaut.horel@gmail.com> 2016-02-04 20:00:45 -0500
committer: Thibaut Horel <thibaut.horel@gmail.com> 2016-02-04 20:00:45 -0500
commit: da7359cd452f2ded9e05e753fb125508343b8587 (patch)
tree: fab5a9cd876a52bf378ff19441c3224d72e9c1d3
parent: c25f5fefbfdcbee15685778dcf25b82849e0d617 (diff)
download: anonbib-da7359cd452f2ded9e05e753fb125508343b8587.tar.gz
3 files changed, 159 insertions, 160 deletions
diff --git a/BibTeX.py b/BibTeX.py
index 750ca43..85228a1 100644
--- a/BibTeX.py
+++ b/BibTeX.py
@@ -9,24 +9,12 @@
 import cStringIO
 import re
 import sys
-import os
-import copy
 
 import config
 
-from entry import BibTeXEntry, buildAuthorTable
-from utils import txtize, url_untranslate, smartJoin
-
-__all__ = ['ParseError', 'BibTeX', 'BibTeXEntry', 'htmlize',
-           'ParsedAuthor', 'FileIter', 'Parser', 'parseFile',
-           'splitEntriesBy', 'sortEntriesBy']
-
-# List: must map from month number to month name.
-MONTHS = [None, "January", "February", "March", "April", "May", "June",
-          "July", "August", "September", "October", "November", "December"]
-
-
+from entry import BibTeXEntry
 
+__all__ = ['ParseError', 'BibTeX', 'FileIter', 'Parser', 'parseFile']
 
 
 class ParseError(Exception):
@@ -34,8 +22,6 @@ class ParseError(Exception):
     pass
 
 
-
-
 class BibTeX:
     """A parsed BibTeX file"""
     def __init__(self):
@@ -95,135 +81,6 @@ class BibTeX:
 
 
 
-def splitEntriesBy(entries, field):
-    """Take a list of BibTeX entries and the name of a bibtex field; return
-       a map from vield value to list of entry."""
-    result = {}
-    for ent in entries:
-        key = ent.get(field)
-        if field in config.MULTI_VAL_FIELDS:
-            key = [k.strip() for k in key.split(',')]
-        else:
-            key = [key]
-        for k in key:
-            try:
-                result[k].append(ent)
-            except:
-                result[k] = [ent]
-    return result
-
-def splitSortedEntriesBy(entries, field):
-    """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'.
-       Return a list of (field-value, entry-list) tuples, in the order
-       given in 'entries'."""
-    result = []
-    curVal = "alskjdsakldj"
-    curList = []
-    for ent in entries:
-        key = ent.get(field)
-        if key == curVal:
-            curList.append(ent)
-        else:
-            curVal = key
-            curList = [ent]
-            result.append((curVal, curList))
-    return result
-
-def sortEntriesBy(entries, field, default):
-    """Take inputs as in splitEntriesBy, and return a list of entries sorted
-       by the value of 'field'. Entries without 'field' are sorted as if their
-       value were 'default'.
-       """
-    tmp = []
-    i = 0
-    for ent in entries:
-        i += 1
-        v = ent.get(field, default)
-        if v.startswith("<span class='bad'>"):
-            v = default
-        if field in config.MULTI_VAL_FIELDS:
-            for v_j in v.split(','):
-                ent_j = copy.deepcopy(ent)
-                ent_j.__setitem__(field, v_j.strip())
-                tmp.append((txtize(v_j.strip()), i, ent_j))
-        else: tmp.append((txtize(v), i, ent))
-    tmp.sort()
-    return [ t[2] for t in tmp ]
-
-def splitEntriesByAuthor(entries):
-    """Take a list of entries, sort them by author names, and return:
-         a sorted list of (authorname-in-html, bibtex-entry-list) tuples,
-         a map from authorname-in-html to name-for-url.
-       Entries with multiple authors appear once per author.
-    """
-    collapsedAuthors = buildAuthorTable(entries)
-    entries = sortEntriesByDate(entries)
-    result = {} # Name in sorting order -> entries
-    htmlResult = {} # name in sorting order -> Full name
-    url_map = {} # Full name -> Url
-    for ent in entries:
-        for a in ent.parsedAuthor:
-            canonical = collapsedAuthors[a]
-            url = canonical.getHomepage()
-            sortkey = canonical.getSortingName()
-            secname = canonical.getSectionName()
-            if url:
-                url_map[secname] = url
-
-            htmlResult[sortkey] = secname
-            result.setdefault(sortkey, []).append(ent)
-    sortnames = result.keys()
-    sortnames.sort()
-    sections = [ (htmlResult[n], result[n]) for n in sortnames ]
-    return sections, url_map
-
-## def sortEntriesByAuthor(entries):
-##     tmp = []
-##     i = 0
-##     for ent in entries:
-##         i += 1
-##         authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr))
-##                     for a in ent.parsedAuthor ]
-##         tmp.append((tuple(authors), i, ent))
-##     tmp.sort()
-##     return [ t[2] for t in tmp ]
-
-def sortEntriesByDate(entries):
-    """Sort a list of entries by their publication date."""
-    tmp = []
-    i = 0
-    for ent in entries:
-        i += 1
-        if (ent.get('month') == "forthcoming" or
-            ent.get('year') == "forthcoming"):
-            tmp.append((20000*13, i, ent))
-            continue
-        try:
-            monthname = ent.get("month")
-            if monthname is not None:
-                match = re.match(r"(\w+)--\w+", monthname)
-                if match:
-                    monthname = match.group(1)
-            mon = MONTHS.index(monthname)
-        except ValueError:
-            print "Unknown month %r in %s"%(ent.get("month"), ent.key)
-            mon = 0
-
-        try:
-            date = int(ent['year'])*13 + mon
-        except KeyError:
-            print "ERROR: No year field in %s"%ent.key
-            date = 10000*13
-        except ValueError:
-            date = 10000*13
-        tmp.append((date, i, ent))
-    tmp.sort()
-    return [ t[2] for t in tmp ]
-
-
-
-
-
 
 
 class FileIter:
@@ -238,14 +95,12 @@ class FileIter:
         assert self.iter
         self.lineno = 0
         self._next = it.next
+
     def next(self):
         self.lineno += 1
         return self._next()
 
 
-
-
-
 class Parser:
     """Parser class: reads BibTeX from a file and returns a BibTeX object."""
     ## Fields
@@ -504,6 +359,7 @@ BRACE_CLOSE_RE = re.compile(r'^([^\{\}]*)\}(.*)')
 BRACE_OPEN_RE = re.compile(r'^([^\{\}]*\{)(.*)')
 RAW_DATA_RE = re.compile(r'^([^\s\},]+)(.*)')
 
+
 def parseFile(filename, result=None):
     """Helper function: parse a single BibTeX file"""
     f = FileIter(fname=filename)
@@ -514,6 +370,7 @@ def parseFile(filename, result=None):
         e.check()
     return r
 
+
 def parseString(string, result=None):
     """Helper function: parse BibTeX from a string"""
     f = FileIter(string=string)
diff --git a/sortutils.py b/sortutils.py
new file mode 100644
index 0000000..419fe03
--- /dev/null
+++ b/sortutils.py
@@ -0,0 +1,138 @@
+import config
+import copy
+from utils import txtize
+from entry import buildAuthorTable
+import re
+
+# List: must map from month number to month name.
+MONTHS = [None, "January", "February", "March", "April", "May", "June",
+          "July", "August", "September", "October", "November", "December"]
+
+
+def splitEntriesBy(entries, field):
+    """Take a list of BibTeX entries and the name of a bibtex field; return
+       a map from vield value to list of entry."""
+    result = {}
+    for ent in entries:
+        key = ent.get(field)
+        if field in config.MULTI_VAL_FIELDS:
+            key = [k.strip() for k in key.split(',')]
+        else:
+            key = [key]
+        for k in key:
+            try:
+                result[k].append(ent)
+            except:
+                result[k] = [ent]
+    return result
+
+
+def splitSortedEntriesBy(entries, field):
+    """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'.
+       Return a list of (field-value, entry-list) tuples, in the order
+       given in 'entries'."""
+    result = []
+    curVal = "alskjdsakldj"
+    curList = []
+    for ent in entries:
+        key = ent.get(field)
+        if key == curVal:
+            curList.append(ent)
+        else:
+            curVal = key
+            curList = [ent]
+            result.append((curVal, curList))
+    return result
+
+
+def sortEntriesBy(entries, field, default):
+    """Take inputs as in splitEntriesBy, and return a list of entries sorted
+       by the value of 'field'. Entries without 'field' are sorted as if their
+       value were 'default'.
+       """
+    tmp = []
+    i = 0
+    for ent in entries:
+        i += 1
+        v = ent.get(field, default)
+        if v.startswith("<span class='bad'>"):
+            v = default
+        if field in config.MULTI_VAL_FIELDS:
+            for v_j in v.split(','):
+                ent_j = copy.deepcopy(ent)
+                ent_j.__setitem__(field, v_j.strip())
+                tmp.append((txtize(v_j.strip()), i, ent_j))
+        else: tmp.append((txtize(v), i, ent))
+    tmp.sort()
+    return [t[2] for t in tmp]
+
+
+def splitEntriesByAuthor(entries):
+    """Take a list of entries, sort them by author names, and return:
+         a sorted list of (authorname-in-html, bibtex-entry-list) tuples,
+         a map from authorname-in-html to name-for-url.
+       Entries with multiple authors appear once per author.
+    """
+    collapsedAuthors = buildAuthorTable(entries)
+    entries = sortEntriesByDate(entries)
+    result = {} # Name in sorting order -> entries
+    htmlResult = {} # name in sorting order -> Full name
+    url_map = {} # Full name -> Url
+    for ent in entries:
+        for a in ent.parsedAuthor:
+            canonical = collapsedAuthors[a]
+            url = canonical.getHomepage()
+            sortkey = canonical.getSortingName()
+            secname = canonical.getSectionName()
+            if url:
+                url_map[secname] = url
+
+            htmlResult[sortkey] = secname
+            result.setdefault(sortkey, []).append(ent)
+    sortnames = result.keys()
+    sortnames.sort()
+    sections = [ (htmlResult[n], result[n]) for n in sortnames ]
+    return sections, url_map
+
+## def sortEntriesByAuthor(entries):
+##     tmp = []
+##     i = 0
+##     for ent in entries:
+##         i += 1
+##         authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr))
+##                     for a in ent.parsedAuthor ]
+##         tmp.append((tuple(authors), i, ent))
+##     tmp.sort()
+##     return [ t[2] for t in tmp ]
+
+def sortEntriesByDate(entries):
+    """Sort a list of entries by their publication date."""
+    tmp = []
+    i = 0
+    for ent in entries:
+        i += 1
+        if (ent.get('month') == "forthcoming" or
+            ent.get('year') == "forthcoming"):
+            tmp.append((20000*13, i, ent))
+            continue
+        try:
+            monthname = ent.get("month")
+            if monthname is not None:
+                match = re.match(r"(\w+)--\w+", monthname)
+                if match:
+                    monthname = match.group(1)
+            mon = MONTHS.index(monthname)
+        except ValueError:
+            print "Unknown month %r in %s"%(ent.get("month"), ent.key)
+            mon = 0
+
+        try:
+            date = int(ent['year'])*13 + mon
+        except KeyError:
+            print "ERROR: No year field in %s"%ent.key
+            date = 10000*13
+        except ValueError:
+            date = 10000*13
+        tmp.append((date, i, ent))
+    tmp.sort()
+    return [ t[2] for t in tmp ]
diff --git a/writeHTML.py b/writeHTML.py
index 2a3d455..d4e11a0 100755
--- a/writeHTML.py
+++ b/writeHTML.py
@@ -12,6 +12,10 @@ assert sys.version_info[:3] >= (2,2,0)
 os.umask(022)
 
 import BibTeX
+from sortutils import sortEntriesBy, splitSortedEntriesBy, sortEntriesByDate,\
+    splitEntriesByAuthor
+from utils import smartJoin, url_untranslate
+from entry import buildAuthorTable
 import config
 
 def getTemplate(name):
@@ -40,10 +44,10 @@ def writeBody(f, sections, section_urls, cache_path, base_url):
         sDisp = sDisp.replace(" ", "&nbsp;")
         if u:
             print >>f, ('<li><h3><a name="%s"></a><a href="%s">%s</a></h3>'%(
-                (BibTeX.url_untranslate(s), u, sDisp)))
+                (url_untranslate(s), u, sDisp)))
         else:
             print >>f, ('<li><h3><a name="%s">%s</a></h3>'%(
-                BibTeX.url_untranslate(s),sDisp))
+                url_untranslate(s),sDisp))
         print >>f, "<ul class='expand'>"
         for e in entries:
             print >>f, e.to_html(cache_path=cache_path, base_url=base_url)
@@ -64,7 +68,7 @@ def writeHTML(f, sections, sectionType, fieldName, choices,
         hts = re.sub(r'\s+', ' ', s.strip())
         hts = s.replace(" ", "&nbsp;")
         secStr.append("<p class='l2'><a href='#%s'>%s</a></p>\n"%
-                      ((BibTeX.url_untranslate(s),hts)))
+                      ((url_untranslate(s),hts)))
     secStr = "".join(secStr)
 
     #
@@ -78,7 +82,7 @@ def writeHTML(f, sections, sectionType, fieldName, choices,
         if t == tag:
             tagListStr.append(name)
         else:
-            url = BibTeX.smartJoin(root, config.TAG_DIRECTORIES[t], "date.html")
+            url = smartJoin(root, config.TAG_DIRECTORIES[t], "date.html")
             tagListStr.append("<a href='%s'>%s</a>"%(url, name))
     tagListStr = "&nbsp;|&nbsp;".join(tagListStr)
 
@@ -130,7 +134,7 @@ def writePageSet(config, bib, tag):
 
     tagdir = config.TAG_DIRECTORIES[tag]
     outdir = os.path.join(config.OUTPUT_DIR, tagdir)
-    cache_url_path = BibTeX.smartJoin("../"*pathLength(tagdir),
+    cache_url_path = smartJoin("../"*pathLength(tagdir),
                                       config.CACHE_DIR)
     if not os.path.exists(outdir):
         os.makedirs(outdir, 0755)
@@ -138,12 +142,12 @@ def writePageSet(config, bib, tag):
 
     ## By topic.
 
-    entries = BibTeX.sortEntriesBy(bib_entries, "www_section", "ZZZZZZZZZZZZZZ")
-    entries = BibTeX.splitSortedEntriesBy(entries, "www_section")
+    entries = sortEntriesBy(bib_entries, "www_section", "ZZZZZZZZZZZZZZ")
+    entries = splitSortedEntriesBy(entries, "www_section")
     if entries[-1][0].startswith("<span class='bad'>"):
         entries[-1] = ("Miscellaneous", entries[-1][1])
 
-    entries = [ (s, BibTeX.sortEntriesByDate(ents))
+    entries = [ (s, sortEntriesByDate(ents))
                 for s, ents in entries
                 ]
 
@@ -159,8 +163,8 @@ def writePageSet(config, bib, tag):
 
     ## By date.
 
-    entries = BibTeX.sortEntriesByDate(bib_entries)
-    entries = BibTeX.splitSortedEntriesBy(entries, 'year')
+    entries = sortEntriesByDate(bib_entries)
+    entries = splitSortedEntriesBy(entries, 'year')
     for idx in -1, -2:
         try:
             if entries[idx][0].startswith("<span class='bad'>"):
@@ -192,7 +196,7 @@ def writePageSet(config, bib, tag):
     f.close()
 
     ## By author
-    entries, url_map = BibTeX.splitEntriesByAuthor(bib_entries)
+    entries, url_map = splitEntriesByAuthor(bib_entries)
 
     f = open(os.path.join(outdir,"author.html"), 'w')
     writeHTML(f, entries, "Authors", "author",
@@ -226,7 +230,7 @@ def writePageSet(config, bib, tag):
         print >>f, (
             ("<tr><td class='bibtex'><a name='%s'>%s</a>"
             "<pre class='bibtex'>%s</pre></td></tr>")
-            %(BibTeX.url_untranslate(ent.key), ent.key, ent.format(90,8,1)))
+            %(url_untranslate(ent.key), ent.key, ent.format(90,8,1)))
     print >>f, footer
     f.close()
author	Thibaut Horel <thibaut.horel@gmail.com>	2016-02-04 20:00:45 -0500
committer	Thibaut Horel <thibaut.horel@gmail.com>	2016-02-04 20:00:45 -0500
commit	da7359cd452f2ded9e05e753fb125508343b8587 (patch)
tree	fab5a9cd876a52bf378ff19441c3224d72e9c1d3
parent	c25f5fefbfdcbee15685778dcf25b82849e0d617 (diff)
download	anonbib-da7359cd452f2ded9e05e753fb125508343b8587.tar.gz