aboutsummaryrefslogtreecommitdiffstats
path: root/sortutils.py
diff options
context:
space:
mode:
Diffstat (limited to 'sortutils.py')
-rw-r--r--sortutils.py138
1 files changed, 138 insertions, 0 deletions
diff --git a/sortutils.py b/sortutils.py
new file mode 100644
index 0000000..419fe03
--- /dev/null
+++ b/sortutils.py
@@ -0,0 +1,138 @@
+import config
+import copy
+from utils import txtize
+from entry import buildAuthorTable
+import re
+
+# List: must map from month number to month name.
+MONTHS = [None, "January", "February", "March", "April", "May", "June",
+ "July", "August", "September", "October", "November", "December"]
+
+
+def splitEntriesBy(entries, field):
+ """Take a list of BibTeX entries and the name of a bibtex field; return
+ a map from vield value to list of entry."""
+ result = {}
+ for ent in entries:
+ key = ent.get(field)
+ if field in config.MULTI_VAL_FIELDS:
+ key = [k.strip() for k in key.split(',')]
+ else:
+ key = [key]
+ for k in key:
+ try:
+ result[k].append(ent)
+ except:
+ result[k] = [ent]
+ return result
+
+
+def splitSortedEntriesBy(entries, field):
+ """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'.
+ Return a list of (field-value, entry-list) tuples, in the order
+ given in 'entries'."""
+ result = []
+ curVal = "alskjdsakldj"
+ curList = []
+ for ent in entries:
+ key = ent.get(field)
+ if key == curVal:
+ curList.append(ent)
+ else:
+ curVal = key
+ curList = [ent]
+ result.append((curVal, curList))
+ return result
+
+
+def sortEntriesBy(entries, field, default):
+ """Take inputs as in splitEntriesBy, and return a list of entries sorted
+ by the value of 'field'. Entries without 'field' are sorted as if their
+ value were 'default'.
+ """
+ tmp = []
+ i = 0
+ for ent in entries:
+ i += 1
+ v = ent.get(field, default)
+ if v.startswith("<span class='bad'>"):
+ v = default
+ if field in config.MULTI_VAL_FIELDS:
+ for v_j in v.split(','):
+ ent_j = copy.deepcopy(ent)
+ ent_j.__setitem__(field, v_j.strip())
+ tmp.append((txtize(v_j.strip()), i, ent_j))
+ else: tmp.append((txtize(v), i, ent))
+ tmp.sort()
+ return [t[2] for t in tmp]
+
+
+def splitEntriesByAuthor(entries):
+ """Take a list of entries, sort them by author names, and return:
+ a sorted list of (authorname-in-html, bibtex-entry-list) tuples,
+ a map from authorname-in-html to name-for-url.
+ Entries with multiple authors appear once per author.
+ """
+ collapsedAuthors = buildAuthorTable(entries)
+ entries = sortEntriesByDate(entries)
+ result = {} # Name in sorting order -> entries
+ htmlResult = {} # name in sorting order -> Full name
+ url_map = {} # Full name -> Url
+ for ent in entries:
+ for a in ent.parsedAuthor:
+ canonical = collapsedAuthors[a]
+ url = canonical.getHomepage()
+ sortkey = canonical.getSortingName()
+ secname = canonical.getSectionName()
+ if url:
+ url_map[secname] = url
+
+ htmlResult[sortkey] = secname
+ result.setdefault(sortkey, []).append(ent)
+ sortnames = result.keys()
+ sortnames.sort()
+ sections = [ (htmlResult[n], result[n]) for n in sortnames ]
+ return sections, url_map
+
+## def sortEntriesByAuthor(entries):
+## tmp = []
+## i = 0
+## for ent in entries:
+## i += 1
+## authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr))
+## for a in ent.parsedAuthor ]
+## tmp.append((tuple(authors), i, ent))
+## tmp.sort()
+## return [ t[2] for t in tmp ]
+
+def sortEntriesByDate(entries):
+ """Sort a list of entries by their publication date."""
+ tmp = []
+ i = 0
+ for ent in entries:
+ i += 1
+ if (ent.get('month') == "forthcoming" or
+ ent.get('year') == "forthcoming"):
+ tmp.append((20000*13, i, ent))
+ continue
+ try:
+ monthname = ent.get("month")
+ if monthname is not None:
+ match = re.match(r"(\w+)--\w+", monthname)
+ if match:
+ monthname = match.group(1)
+ mon = MONTHS.index(monthname)
+ except ValueError:
+ print "Unknown month %r in %s"%(ent.get("month"), ent.key)
+ mon = 0
+
+ try:
+ date = int(ent['year'])*13 + mon
+ except KeyError:
+ print "ERROR: No year field in %s"%ent.key
+ date = 10000*13
+ except ValueError:
+ date = 10000*13
+ tmp.append((date, i, ent))
+ tmp.sort()
+ return [ t[2] for t in tmp ]