From da7359cd452f2ded9e05e753fb125508343b8587 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Thu, 4 Feb 2016 20:00:45 -0500 Subject: Split sort utilities --- sortutils.py | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 sortutils.py (limited to 'sortutils.py') diff --git a/sortutils.py b/sortutils.py new file mode 100644 index 0000000..419fe03 --- /dev/null +++ b/sortutils.py @@ -0,0 +1,138 @@ +import config +import copy +from utils import txtize +from entry import buildAuthorTable +import re + +# List: must map from month number to month name. +MONTHS = [None, "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December"] + + +def splitEntriesBy(entries, field): + """Take a list of BibTeX entries and the name of a bibtex field; return + a map from vield value to list of entry.""" + result = {} + for ent in entries: + key = ent.get(field) + if field in config.MULTI_VAL_FIELDS: + key = [k.strip() for k in key.split(',')] + else: + key = [key] + for k in key: + try: + result[k].append(ent) + except: + result[k] = [ent] + return result + + +def splitSortedEntriesBy(entries, field): + """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'. + Return a list of (field-value, entry-list) tuples, in the order + given in 'entries'.""" + result = [] + curVal = "alskjdsakldj" + curList = [] + for ent in entries: + key = ent.get(field) + if key == curVal: + curList.append(ent) + else: + curVal = key + curList = [ent] + result.append((curVal, curList)) + return result + + +def sortEntriesBy(entries, field, default): + """Take inputs as in splitEntriesBy, and return a list of entries sorted + by the value of 'field'. Entries without 'field' are sorted as if their + value were 'default'. + """ + tmp = [] + i = 0 + for ent in entries: + i += 1 + v = ent.get(field, default) + if v.startswith(""): + v = default + if field in config.MULTI_VAL_FIELDS: + for v_j in v.split(','): + ent_j = copy.deepcopy(ent) + ent_j.__setitem__(field, v_j.strip()) + tmp.append((txtize(v_j.strip()), i, ent_j)) + else: tmp.append((txtize(v), i, ent)) + tmp.sort() + return [t[2] for t in tmp] + + +def splitEntriesByAuthor(entries): + """Take a list of entries, sort them by author names, and return: + a sorted list of (authorname-in-html, bibtex-entry-list) tuples, + a map from authorname-in-html to name-for-url. + Entries with multiple authors appear once per author. + """ + collapsedAuthors = buildAuthorTable(entries) + entries = sortEntriesByDate(entries) + result = {} # Name in sorting order -> entries + htmlResult = {} # name in sorting order -> Full name + url_map = {} # Full name -> Url + for ent in entries: + for a in ent.parsedAuthor: + canonical = collapsedAuthors[a] + url = canonical.getHomepage() + sortkey = canonical.getSortingName() + secname = canonical.getSectionName() + if url: + url_map[secname] = url + + htmlResult[sortkey] = secname + result.setdefault(sortkey, []).append(ent) + sortnames = result.keys() + sortnames.sort() + sections = [ (htmlResult[n], result[n]) for n in sortnames ] + return sections, url_map + +## def sortEntriesByAuthor(entries): +## tmp = [] +## i = 0 +## for ent in entries: +## i += 1 +## authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr)) +## for a in ent.parsedAuthor ] +## tmp.append((tuple(authors), i, ent)) +## tmp.sort() +## return [ t[2] for t in tmp ] + +def sortEntriesByDate(entries): + """Sort a list of entries by their publication date.""" + tmp = [] + i = 0 + for ent in entries: + i += 1 + if (ent.get('month') == "forthcoming" or + ent.get('year') == "forthcoming"): + tmp.append((20000*13, i, ent)) + continue + try: + monthname = ent.get("month") + if monthname is not None: + match = re.match(r"(\w+)--\w+", monthname) + if match: + monthname = match.group(1) + mon = MONTHS.index(monthname) + except ValueError: + print "Unknown month %r in %s"%(ent.get("month"), ent.key) + mon = 0 + + try: + date = int(ent['year'])*13 + mon + except KeyError: + print "ERROR: No year field in %s"%ent.key + date = 10000*13 + except ValueError: + date = 10000*13 + tmp.append((date, i, ent)) + tmp.sort() + return [ t[2] for t in tmp ] -- cgit v1.2.3-70-g09d2