aboutsummaryrefslogtreecommitdiffstats
path: root/sortutils.py
blob: d86a29982c1d664b796549868407259f72934f36 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import config
import copy
from utils import txtize
from author import buildAuthorTable
import re

# List: must map from month number to month name.
MONTHS = [None, "January", "February", "March", "April", "May", "June",
          "July", "August", "September", "October", "November", "December"]


def splitEntriesBy(entries, field):
    """Take a list of BibTeX entries and the name of a bibtex field; return
       a map from vield value to list of entry."""
    result = {}
    for ent in entries:
        key = ent.get(field)
        if field in config.MULTI_VAL_FIELDS:
            key = [k.strip() for k in key.split(',')]
        else:
            key = [key]
        for k in key:
            try:
                result[k].append(ent)
            except:
                result[k] = [ent]
    return result


def splitSortedEntriesBy(entries, field):
    """Take inputs as in splitEntriesBy, where 'entries' is sorted by 'field'.
       Return a list of (field-value, entry-list) tuples, in the order
       given in 'entries'."""
    result = []
    curVal = "alskjdsakldj"
    curList = []
    for ent in entries:
        key = ent.get(field)
        if key == curVal:
            curList.append(ent)
        else:
            curVal = key
            curList = [ent]
            result.append((curVal, curList))
    return result


def sortEntriesBy(entries, field, default):
    """Take inputs as in splitEntriesBy, and return a list of entries sorted
       by the value of 'field'. Entries without 'field' are sorted as if their
       value were 'default'.
       """
    tmp = []
    i = 0
    for ent in entries:
        i += 1
        v = ent.get(field, default)
        if v.startswith("<span class='bad'>"):
            v = default
        if field in config.MULTI_VAL_FIELDS:
            for v_j in v.split(','):
                ent_j = copy.deepcopy(ent)
                ent_j.__setitem__(field, v_j.strip())
                tmp.append((txtize(v_j.strip()), i, ent_j))
        else: tmp.append((txtize(v), i, ent))
    tmp.sort()
    return [t[2] for t in tmp]


def splitEntriesByAuthor(entries):
    """Take a list of entries, sort them by author names, and return:
         a sorted list of (authorname-in-html, bibtex-entry-list) tuples,
         a map from authorname-in-html to name-for-url.
       Entries with multiple authors appear once per author.
    """
    collapsedAuthors = buildAuthorTable(entries)
    entries = sortEntriesByDate(entries)
    result = {} # Name in sorting order -> entries
    htmlResult = {} # name in sorting order -> Full name
    url_map = {} # Full name -> Url
    for ent in entries:
        for a in ent.parsedAuthor:
            canonical = collapsedAuthors[a]
            url = canonical.getHomepage()
            sortkey = canonical.getSortingName()
            secname = canonical.getSectionName()
            if url:
                url_map[secname] = url

            htmlResult[sortkey] = secname
            result.setdefault(sortkey, []).append(ent)
    sortnames = result.keys()
    sortnames.sort()
    sections = [ (htmlResult[n], result[n]) for n in sortnames ]
    return sections, url_map

## def sortEntriesByAuthor(entries):
##     tmp = []
##     i = 0
##     for ent in entries:
##         i += 1
##         authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr))
##                     for a in ent.parsedAuthor ]
##         tmp.append((tuple(authors), i, ent))
##     tmp.sort()
##     return [ t[2] for t in tmp ]

def sortEntriesByDate(entries):
    """Sort a list of entries by their publication date."""
    tmp = []
    i = 0
    for ent in entries:
        i += 1
        if (ent.get('month') == "forthcoming" or
            ent.get('year') == "forthcoming"):
            tmp.append((20000*13, i, ent))
            continue
        try:
            monthname = ent.get("month")
            if monthname is not None:
                match = re.match(r"(\w+)--\w+", monthname)
                if match:
                    monthname = match.group(1)
            mon = MONTHS.index(monthname)
        except ValueError:
            print "Unknown month %r in %s"%(ent.get("month"), ent.key)
            mon = 0

        try:
            date = int(ent['year'])*13 + mon
        except KeyError:
            print "ERROR: No year field in %s"%ent.key
            date = 10000*13
        except ValueError:
            date = 10000*13
        tmp.append((date, i, ent))
    tmp.sort()
    return [ t[2] for t in tmp ]