diff options
| author | Nick Mathewson <nickm@torproject.org> | 2003-05-23 02:38:55 +0000 |
|---|---|---|
| committer | Nick Mathewson <nickm@torproject.org> | 2003-05-23 02:38:55 +0000 |
| commit | a5d4c56336ec216006fe830f630b7de5dde5c67a (patch) | |
| tree | 4fe39099c345876d29d8aeb4afca07101c2a6d5d | |
| parent | 4c84657cc6a6cffab2e322c70423e4d3b7977ea9 (diff) | |
| download | anonbib-a5d4c56336ec216006fe830f630b7de5dde5c67a.tar.gz | |
Collapse authors with similar names; make output pass XHTML/CSS validators.
Also some author refactoring.
svn:r30
| -rw-r--r-- | BibTeX.py | 217 | ||||
| -rw-r--r-- | TODO | 10 | ||||
| -rw-r--r-- | _template_.html | 36 | ||||
| -rw-r--r-- | _template_bibtex.html | 26 | ||||
| -rw-r--r-- | config.py | 11 | ||||
| -rw-r--r-- | css/main.css | 4 | ||||
| -rw-r--r-- | css/pubs.css | 10 | ||||
| -rw-r--r-- | writeHTML.py | 8 |
8 files changed, 239 insertions, 83 deletions
@@ -20,9 +20,10 @@ WWW_FIELDS = [ 'www_section', 'www_important', 'www_remarks', 'www_txt_url', 'www_ps_gz_url' ] def url_untranslate(s): - s = s.replace(" ", "+") - s = re.sub(r'([%<>])', - lambda m: "%%%02x"%ord(m.group(1)), + #s = s.replace(" ", "_") + #s = s.replace(',', "_") + s = re.sub(r'([%<>, _])', + lambda m: "_%02x"%ord(m.group(1)), s) return s @@ -64,6 +65,40 @@ class BibTeX: newEntries.append(ent) self.entries = newEntries +def buildAuthorTable(entries): + + authorsByLast = {} + for e in entries: + for a in e.parsedAuthor: + authorsByLast.setdefault(tuple(a.last), []).append(a) + # map from author to collapsed author. + result = {} + for e in entries: + for author in e.parsedAuthor: + if result.has_key(author): + continue + + c = author + for a in authorsByLast[tuple(author.last)]: + if a is author: + continue + c = c.collapsesTo(a) + result[author] = c + + if 1: + for a,c in result.items(): + if a != c: + print "Collapsing authors: %s => %s" % (a,c) + if 0: + print parseAuthor("Franz Kaashoek")[0].collapsesTo( + parseAuthor("M. Franz Kaashoek")[0]) + print parseAuthor("Paul F. Syverson")[0].collapsesTo( + parseAuthor("Paul Syverson")[0]) + print parseAuthor("Paul Syverson")[0].collapsesTo( + parseAuthor("Paul F. Syverson")[0]) + + return result + def splitEntriesBy(entries, field): result = {} for ent in entries: @@ -90,30 +125,28 @@ def splitSortedEntriesBy(entries, field): def sortEntriesBy(entries, field, default): tmp = [] + i = 0 for ent in entries: + i += 1 v = ent.get(field, default) if v.startswith("<span class='bad'>"): v = default - tmp.append((txtize(v), ent)) + tmp.append((txtize(v), i, ent)) tmp.sort() - return [ t[1] for t in tmp ] + return [ t[2] for t in tmp ] def splitEntriesByAuthor(entries): + collapsedAuthors = buildAuthorTable(entries) entries = sortEntriesByDate(entries) result = {} # Name in sorting order -> entries htmlResult = {} # name in sorting order -> Full name url_map = {} # Full name -> Url for ent in entries: for a in ent.parsedAuthor: - sortkey = txtize(" ".join(a.von+a.last+a.first+a.jr)) - url = author_url(" ".join(a.first+a.von+a.last+a.jr)) - secname = " ".join(a.last) - more = a.first+a.von - if more: - secname += ", "+" ".join(more) - if a.jr: - secname += ", "+" ".join(a.jr) - secname = htmlize(secname) + canonical = collapsedAuthors[a] + url = canonical.getHomepage() + sortkey = canonical.getSortingName() + secname = canonical.getSectionName() if url: url_map[secname] = url @@ -126,16 +159,20 @@ def splitEntriesByAuthor(entries): def sortEntriesByAuthor(entries): tmp = [] + i = 0 for ent in entries: + i += 1 authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr)) for a in ent.parsedAuthor ] - tmp.append((tuple(authors), ent)) + tmp.append((tuple(authors), i, ent)) tmp.sort() - return [ t[1] for t in tmp ] + return [ t[2] for t in tmp ] def sortEntriesByDate(entries): tmp = [] + i = 0 for ent in entries: + i += 1 try: mon = MONTHS.index(ent.get("month")) except ValueError: @@ -149,9 +186,9 @@ def sortEntriesByDate(entries): date = 10000*13 except ValueError: date = 10000*13 - tmp.append((date, ent)) + tmp.append((date, i, ent)) tmp.sort() - return [ t[1] for t in tmp ] + return [ t[2] for t in tmp ] DISPLAYED_FIELDS = [ 'title', 'author', 'journal', 'booktitle', @@ -365,11 +402,6 @@ class BibTeXEntry: else: res = ["<li><p class='entry'><span class='title'>%s</span>"%( htmlize(self['title']))] - - #eclass = ["entry", "impEntry"][imp] - # - #res = ["<li><p class='%s'><span class='title'>%s</span>"%( - # eclass, htmlize(self['title']))] availability = [] for key, name in (('www_abstract_url', 'abstract'), @@ -385,10 +417,10 @@ class BibTeXEntry: res.append(" <span class='availability'>(") res.append(", ".join(availability)) res.append(")</span>") - res.append("<br><span class='author'>by ") + res.append("<br /><span class='author'>by ") #res.append("\n<!-- %r -->\n" % self.parsedAuthor) - htmlAuthors = [ htmlize_author(a) for a in self.parsedAuthor ] + htmlAuthors = [ a.htmlizeWithLink() for a in self.parsedAuthor ] if len(htmlAuthors) == 1: res.append(htmlAuthors[0]) @@ -401,7 +433,7 @@ class BibTeXEntry: if res[-1][-1] != '.': res.append(".") - res.append("</span><br>\n") + res.append("</span><br />\n") res.append(self.biblio_to_html()) res.append("</p>"), @@ -440,19 +472,6 @@ def htmlize(s): s = s.replace("--", "–"); return s -def htmlize_author(author): - f,v,l,j = author.first,author.von,author.last,author.jr - a = " ".join(f+v+l) - if j: - a = "%s, %s" %(a,j) - a = htmlize(a) - u = author_url(a) - if u: - return "<a href='%s'>%s</a>"%(u,a) - else: - return a - return a - def author_url(author): for pat, url in config.AUTHOR_RE_LIST: if pat.search(author): @@ -465,24 +484,137 @@ def txtize(s): s = RE_TEX_CMD.sub("", s) s = s.translate(ALLCHARS, "{}") return s - PROCEEDINGS_RE = re.compile( r'((?:proceedings|workshop record) of(?: the)? )(.*)', re.I) - class ParsedAuthor: def __init__(self, first, von, last, jr): self.first = first self.von = von self.last = last self.jr = jr + self.collapsable = 1 + s = htmlize(str(self)) + for pat in config.NO_COLLAPSE_AUTHORS_RE_LIST: + if pat.search(s): + self.collapsable = 0 + break + + def __eq__(self, o): + return ((self.first == o.first) and + (self.last == o.last) and + (self.von == o.von) and + (self.jr == o.jr)) + + def __neq__(self, o): + return ((self.first != o.first) or + (self.last != o.last) or + (self.von != o.von) or + (self.jr != o.jr)) + + def __hash__(self): + return hash(repr(self)) + + def collapsesTo(self, o): + if not self.collapsable or not o.collapsable: + return self + + if self.last != o.last or self.von != o.von or self.jr != o.jr: + return self + if not self.first: + return o + + if len(self.first) == len(o.first): + n = [] + for a,b in zip(self.first, o.first): + if a == b: + n.append(a) + elif len(a) == 2 and a[1] == '.' and a[0] == b[0]: + n.append(b) + elif len(b) == 2 and b[1] == '.' and a[0] == b[0]: + n.append(a) + else: + return self + if n == self.first: + return self + elif n == o.first: + return o + else: + return self + else: + realname = max([len(n) for n in self.first+o.first])>2 + if not realname: + return self + + if len(self.first) < len(o.first): + short = self.first; long = o.first + else: + short = o.first; long = self.first + + initials_s = "".join([n[0] for n in short]) + initials_l = "".join([n[0] for n in long]) + idx = initials_l.find(initials_s) + if idx < 0: + return self + n = long[:idx] + for i in range(idx, idx+len(short)): + a = long[i]; b = short[i-idx] + if a == b: + n.append(a) + elif len(a) == 2 and a[1] == '.' and a[0] == b[0]: + n.append(b) + elif len(b) == 2 and b[1] == '.' and a[0] == b[0]: + n.append(a) + else: + return self + n += long[idx+len(short):] + + if n == self.first: + return self + elif n == o.first: + return o + else: + return self + def __repr__(self): return "ParsedAuthor(%r,%r,%r,%r)"%(self.first,self.von, self.last,self.jr) def __str__(self): - return " ".join(self.first+self.von+self.last+self.jr) + a = " ".join(self.first+self.von+self.last) + if self.jr: + return "%s, %s" % (a,self.jr) + return a + + def getHomepage(self): + s = htmlize(str(self)) + for pat, url in config.AUTHOR_RE_LIST: + if pat.search(str(self)): + return url + return None + + def getSortingName(self): + return txtize(" ".join(self.von+self.last+self.first+self.jr)) + + def getSectionName(self): + secname = " ".join(self.last) + more = self.first+self.von + if more: + secname += ", "+" ".join(more) + if self.jr: + secname += ", "+" ".join(self.jr) + secname = htmlize(secname) + return secname + + def htmlizeWithLink(self): + a = str(self) + a = htmlize(a) + u = self.getHomepage() + if u: + return "<a href='%s'>%s</a>"%(u,a) + else: + return a def _split(s,w=79,indent=8): r = [] @@ -522,7 +654,6 @@ class FileIter: def next(self): self.lineno += 1 return self._next() - def parseAuthor(s): items = [] @@ -28,9 +28,11 @@ Next: - Also clean \_ to _ and back - Look for urls in wherepublished. - Forgive newlines in wherepublished, note. - - "Systems, Zero Knowledge"? - - When sorting by author, is Paul Syverson different from Paul - F. Syverson? - - What the heck is the algorithm for sorting within a year, + D "Systems, Zero Knowledge"? + - Make CSS and HTML pass the validator + o When sorting by author, make "Paul F. Syverson" the same person + as "Paul Syverson" unless somebody says different. + - When sorting within a year +What the heck is the algorithm for sorting within a year, when sorting by date? What should it be? diff --git a/_template_.html b/_template_.html index 473fe4c..a3c7d28 100644 --- a/_template_.html +++ b/_template_.html @@ -1,22 +1,25 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN"> -<html><head> -<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> -<meta http-equiv="Content-Style-Type" content="text/css"> +<?xml version="1.0"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta http-equiv="Content-Style-Type" content="text/css" /> <!-- *** I AM MACHINE GENERATED! DO NOT EDIT ME! - -- *** EDIT THE .bib FILE or _template_.html INSTEAD! - -- - -- Generated by `%(command_line)s' - -- (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 --> + *** EDIT THE .bib FILE or _template_.html INSTEAD! + + Generated by `%(command_line)s' + (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 --> <title>Anonymity Bibliography</title> -<link rel="stylesheet" type="text/css" href="./css/main.css"> -<link rel="stylesheet" type="text/css" href="./css/pubs.css"> +<link rel="stylesheet" type="text/css" href="./css/main.css" /> +<link rel="stylesheet" type="text/css" href="./css/pubs.css" /> </head> <body bgcolor="#ffffff" text="#000000" link="#bb0000" vlink="#990099" -alink="#ff9900" marginheight="0" marginwidth="0"> +alink="#ff9900" > <h1 align="center">Anonymity bibliography</h1> <p align="center">%(choices)s</p> @@ -29,11 +32,10 @@ alink="#ff9900" marginheight="0" marginwidth="0"> <!-- Table 2: The sidebar--> <table align="right" cellspacing="0" cellpadding="5" width="100" class="sidebar"> -<tr valign="top"><td><p -class="l1"><strong>%(sectiontypes)s:</strong><br> +<tr valign="top"><td><p class="l1"><strong>%(sectiontypes)s:</strong><br /></p> %(sections)s -</p></td> - +</td> +</tr> </table><!-- End of table 2 --> </td> @@ -41,13 +43,13 @@ class="l1"><strong>%(sectiontypes)s:</strong><br> <h2>Publications by %(field)s</h2> -<ul> +<ul class="sections"> %(entries)s </ul> </td> -<td width="5%%"><br></td> +<td width="5%%"><br /></td> </tr> </table><!-- End of table 1 --> diff --git a/_template_bibtex.html b/_template_bibtex.html index 82246bd..d5bad7e 100644 --- a/_template_bibtex.html +++ b/_template_bibtex.html @@ -1,24 +1,28 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN"> -<html><head> -<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> -<meta http-equiv="Content-Style-Type" content="text/css"> +<?xml version="1.0"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" /> +<meta http-equiv="Content-Style-Type" content="text/css" /> <!-- *** I AM MACHINE GENERATED! DO NOT EDIT ME! - -- *** EDIT THE .bib FILE or _template_.html INSTEAD! - -- - -- Generated by `%(command_line)s' - -- (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 --> + *** EDIT THE .bib FILE or _template_.html INSTEAD! + + Generated by `%(command_line)s' + (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 --> <title>Anonymity Bibliography: BibTeX</title> -<link rel="stylesheet" type="text/css" href="./css/main.css"> -<link rel="stylesheet" type="text/css" href="./css/pubs.css"> +<link rel="stylesheet" type="text/css" href="./css/main.css" /> +<link rel="stylesheet" type="text/css" href="./css/pubs.css" /> </head> <body bgcolor="#ffffff" text="#000000" link="#bb0000" vlink="#990099" - alink="#ff9900" marginheight="0" marginwidth="0"> + alink="#ff9900" > <table cellspacing="15" border="0" align="center" width="100%%"> %(entries)s </table> +</body> </html> @@ -29,6 +29,13 @@ AUTHOR_URLS = { } +# List of paterns for author names _not_ to do an initial-tolerant +# match on when building section list. E.g., if "J\\. Smith" is in +# this list, he won't be folded into "John Smith". +NO_COLLAPSE_AUTHORS = [ + +] + INITIAL_STRINGS = { # MONTHS 'jan' : 'January', 'feb' : 'February', @@ -55,3 +62,7 @@ OMIT_ENTRIES = ("proceedings", "journal") AUTHOR_RE_LIST = [ (re.compile(k, re.I), v,) for k, v in AUTHOR_URLS.items() ] + +NO_COLLAPSE_AUTHORS_RE_LIST = [ + re.compile(pat, re.I) for pat in NO_COLLAPSE_AUTHORS + ] diff --git a/css/main.css b/css/main.css index 8b336c9..b14c394 100644 --- a/css/main.css +++ b/css/main.css @@ -49,6 +49,10 @@ UL.expand { margin-bottom: 1em; } +UL.sections { + list-style: none; +} + /* Font-level properties */ PRE { diff --git a/css/pubs.css b/css/pubs.css index 57b654f..07d7a37 100644 --- a/css/pubs.css +++ b/css/pubs.css @@ -29,6 +29,7 @@ SPAN.biblio A { SPAN.bad { text-decoration: underline; + color: #000; background-color: #FDF; } @@ -38,7 +39,7 @@ P.remarks { margin-bottom: 0; margin-left: 5em; padding-left: 0.5em; - border-width: 0 0 0 5; + border-width: 0 0 0 5px; border-color: black; border-style: solid; } @@ -63,7 +64,7 @@ P.entry { } DIV.impEntry { - border-width: 0.1; + border-width: 1px; border-color: black; border-style: solid; background-color: #FFE; @@ -80,7 +81,7 @@ P.impEntry { } TABLE.sidebar { - border-width: 2; + border-width: 2px; border-color: black; border-style: solid; background-color: #CFF; @@ -88,7 +89,7 @@ TABLE.sidebar { TD.bibtex { font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace; - border-width: 2; + border-width: 2px; font-weight: normal; border-color: black; border-style: solid; @@ -99,3 +100,4 @@ PRE.bibtex { font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace; font-size: smaller; } + diff --git a/writeHTML.py b/writeHTML.py index 019795f..e5d0816 100644 --- a/writeHTML.py +++ b/writeHTML.py @@ -25,15 +25,15 @@ def writeBody(f, sections, section_urls): sDisp = re.sub(r'\s+', ' ', s.strip()) sDisp = sDisp.replace(" ", " ") if u: - print >>f, ('<h3><a name="%s"><a href="%s">%s</a></a></h3>'%( + print >>f, ('<li><h3><a name="%s"></a><a href="%s">%s</a></h3>'%( (BibTeX.url_untranslate(s), u, sDisp))) else: - print >>f, ('<h3><a name="%s">%s</a></h3>'%( + print >>f, ('<li><h3><a name="%s">%s</a></h3>'%( BibTeX.url_untranslate(s),sDisp)) print >>f, "<ul class='expand'>" for e in entries: print >>f, e.to_html() - print >>f, "</ul>" + print >>f, "</ul></li>" def writeHTML(f, sections, sectionType, fieldName, choices, section_urls={}): """sections: list of (sectionname, [list of BibTeXEntry])''' @@ -57,7 +57,7 @@ def writeHTML(f, sections, sectionType, fieldName, choices, section_urls={}): else: choiceStr.append(choice) - choiceStr = "<p align='center'>%s</p>" % (" | ".join(choiceStr)) + choiceStr = (" | ".join(choiceStr)) fields = { 'command_line' : "", 'sectiontypes' : sectionType, |
