aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2003-05-23 02:38:55 +0000
committerNick Mathewson <nickm@torproject.org>2003-05-23 02:38:55 +0000
commita5d4c56336ec216006fe830f630b7de5dde5c67a (patch)
tree4fe39099c345876d29d8aeb4afca07101c2a6d5d
parent4c84657cc6a6cffab2e322c70423e4d3b7977ea9 (diff)
downloadanonbib-a5d4c56336ec216006fe830f630b7de5dde5c67a.tar.gz
Collapse authors with similar names; make output pass XHTML/CSS validators.
Also some author refactoring. svn:r30
-rw-r--r--BibTeX.py217
-rw-r--r--TODO10
-rw-r--r--_template_.html36
-rw-r--r--_template_bibtex.html26
-rw-r--r--config.py11
-rw-r--r--css/main.css4
-rw-r--r--css/pubs.css10
-rw-r--r--writeHTML.py8
8 files changed, 239 insertions, 83 deletions
diff --git a/BibTeX.py b/BibTeX.py
index 2e61453..bf849bd 100644
--- a/BibTeX.py
+++ b/BibTeX.py
@@ -20,9 +20,10 @@ WWW_FIELDS = [ 'www_section', 'www_important', 'www_remarks',
'www_txt_url', 'www_ps_gz_url' ]
def url_untranslate(s):
- s = s.replace(" ", "+")
- s = re.sub(r'([%<>])',
- lambda m: "%%%02x"%ord(m.group(1)),
+ #s = s.replace(" ", "_")
+ #s = s.replace(',', "_")
+ s = re.sub(r'([%<>, _])',
+ lambda m: "_%02x"%ord(m.group(1)),
s)
return s
@@ -64,6 +65,40 @@ class BibTeX:
newEntries.append(ent)
self.entries = newEntries
+def buildAuthorTable(entries):
+
+ authorsByLast = {}
+ for e in entries:
+ for a in e.parsedAuthor:
+ authorsByLast.setdefault(tuple(a.last), []).append(a)
+ # map from author to collapsed author.
+ result = {}
+ for e in entries:
+ for author in e.parsedAuthor:
+ if result.has_key(author):
+ continue
+
+ c = author
+ for a in authorsByLast[tuple(author.last)]:
+ if a is author:
+ continue
+ c = c.collapsesTo(a)
+ result[author] = c
+
+ if 1:
+ for a,c in result.items():
+ if a != c:
+ print "Collapsing authors: %s => %s" % (a,c)
+ if 0:
+ print parseAuthor("Franz Kaashoek")[0].collapsesTo(
+ parseAuthor("M. Franz Kaashoek")[0])
+ print parseAuthor("Paul F. Syverson")[0].collapsesTo(
+ parseAuthor("Paul Syverson")[0])
+ print parseAuthor("Paul Syverson")[0].collapsesTo(
+ parseAuthor("Paul F. Syverson")[0])
+
+ return result
+
def splitEntriesBy(entries, field):
result = {}
for ent in entries:
@@ -90,30 +125,28 @@ def splitSortedEntriesBy(entries, field):
def sortEntriesBy(entries, field, default):
tmp = []
+ i = 0
for ent in entries:
+ i += 1
v = ent.get(field, default)
if v.startswith("<span class='bad'>"):
v = default
- tmp.append((txtize(v), ent))
+ tmp.append((txtize(v), i, ent))
tmp.sort()
- return [ t[1] for t in tmp ]
+ return [ t[2] for t in tmp ]
def splitEntriesByAuthor(entries):
+ collapsedAuthors = buildAuthorTable(entries)
entries = sortEntriesByDate(entries)
result = {} # Name in sorting order -> entries
htmlResult = {} # name in sorting order -> Full name
url_map = {} # Full name -> Url
for ent in entries:
for a in ent.parsedAuthor:
- sortkey = txtize(" ".join(a.von+a.last+a.first+a.jr))
- url = author_url(" ".join(a.first+a.von+a.last+a.jr))
- secname = " ".join(a.last)
- more = a.first+a.von
- if more:
- secname += ", "+" ".join(more)
- if a.jr:
- secname += ", "+" ".join(a.jr)
- secname = htmlize(secname)
+ canonical = collapsedAuthors[a]
+ url = canonical.getHomepage()
+ sortkey = canonical.getSortingName()
+ secname = canonical.getSectionName()
if url:
url_map[secname] = url
@@ -126,16 +159,20 @@ def splitEntriesByAuthor(entries):
def sortEntriesByAuthor(entries):
tmp = []
+ i = 0
for ent in entries:
+ i += 1
authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr))
for a in ent.parsedAuthor ]
- tmp.append((tuple(authors), ent))
+ tmp.append((tuple(authors), i, ent))
tmp.sort()
- return [ t[1] for t in tmp ]
+ return [ t[2] for t in tmp ]
def sortEntriesByDate(entries):
tmp = []
+ i = 0
for ent in entries:
+ i += 1
try:
mon = MONTHS.index(ent.get("month"))
except ValueError:
@@ -149,9 +186,9 @@ def sortEntriesByDate(entries):
date = 10000*13
except ValueError:
date = 10000*13
- tmp.append((date, ent))
+ tmp.append((date, i, ent))
tmp.sort()
- return [ t[1] for t in tmp ]
+ return [ t[2] for t in tmp ]
DISPLAYED_FIELDS = [ 'title', 'author', 'journal', 'booktitle',
@@ -365,11 +402,6 @@ class BibTeXEntry:
else:
res = ["<li><p class='entry'><span class='title'>%s</span>"%(
htmlize(self['title']))]
-
- #eclass = ["entry", "impEntry"][imp]
- #
- #res = ["<li><p class='%s'><span class='title'>%s</span>"%(
- # eclass, htmlize(self['title']))]
availability = []
for key, name in (('www_abstract_url', 'abstract'),
@@ -385,10 +417,10 @@ class BibTeXEntry:
res.append(" <span class='availability'>(")
res.append(",&nbsp;".join(availability))
res.append(")</span>")
- res.append("<br><span class='author'>by ")
+ res.append("<br /><span class='author'>by ")
#res.append("\n<!-- %r -->\n" % self.parsedAuthor)
- htmlAuthors = [ htmlize_author(a) for a in self.parsedAuthor ]
+ htmlAuthors = [ a.htmlizeWithLink() for a in self.parsedAuthor ]
if len(htmlAuthors) == 1:
res.append(htmlAuthors[0])
@@ -401,7 +433,7 @@ class BibTeXEntry:
if res[-1][-1] != '.':
res.append(".")
- res.append("</span><br>\n")
+ res.append("</span><br />\n")
res.append(self.biblio_to_html())
res.append("</p>"),
@@ -440,19 +472,6 @@ def htmlize(s):
s = s.replace("--", "&ndash;");
return s
-def htmlize_author(author):
- f,v,l,j = author.first,author.von,author.last,author.jr
- a = " ".join(f+v+l)
- if j:
- a = "%s, %s" %(a,j)
- a = htmlize(a)
- u = author_url(a)
- if u:
- return "<a href='%s'>%s</a>"%(u,a)
- else:
- return a
- return a
-
def author_url(author):
for pat, url in config.AUTHOR_RE_LIST:
if pat.search(author):
@@ -465,24 +484,137 @@ def txtize(s):
s = RE_TEX_CMD.sub("", s)
s = s.translate(ALLCHARS, "{}")
return s
-
PROCEEDINGS_RE = re.compile(
r'((?:proceedings|workshop record) of(?: the)? )(.*)',
re.I)
-
class ParsedAuthor:
def __init__(self, first, von, last, jr):
self.first = first
self.von = von
self.last = last
self.jr = jr
+ self.collapsable = 1
+ s = htmlize(str(self))
+ for pat in config.NO_COLLAPSE_AUTHORS_RE_LIST:
+ if pat.search(s):
+ self.collapsable = 0
+ break
+
+ def __eq__(self, o):
+ return ((self.first == o.first) and
+ (self.last == o.last) and
+ (self.von == o.von) and
+ (self.jr == o.jr))
+
+ def __neq__(self, o):
+ return ((self.first != o.first) or
+ (self.last != o.last) or
+ (self.von != o.von) or
+ (self.jr != o.jr))
+
+ def __hash__(self):
+ return hash(repr(self))
+
+ def collapsesTo(self, o):
+ if not self.collapsable or not o.collapsable:
+ return self
+
+ if self.last != o.last or self.von != o.von or self.jr != o.jr:
+ return self
+ if not self.first:
+ return o
+
+ if len(self.first) == len(o.first):
+ n = []
+ for a,b in zip(self.first, o.first):
+ if a == b:
+ n.append(a)
+ elif len(a) == 2 and a[1] == '.' and a[0] == b[0]:
+ n.append(b)
+ elif len(b) == 2 and b[1] == '.' and a[0] == b[0]:
+ n.append(a)
+ else:
+ return self
+ if n == self.first:
+ return self
+ elif n == o.first:
+ return o
+ else:
+ return self
+ else:
+ realname = max([len(n) for n in self.first+o.first])>2
+ if not realname:
+ return self
+
+ if len(self.first) < len(o.first):
+ short = self.first; long = o.first
+ else:
+ short = o.first; long = self.first
+
+ initials_s = "".join([n[0] for n in short])
+ initials_l = "".join([n[0] for n in long])
+ idx = initials_l.find(initials_s)
+ if idx < 0:
+ return self
+ n = long[:idx]
+ for i in range(idx, idx+len(short)):
+ a = long[i]; b = short[i-idx]
+ if a == b:
+ n.append(a)
+ elif len(a) == 2 and a[1] == '.' and a[0] == b[0]:
+ n.append(b)
+ elif len(b) == 2 and b[1] == '.' and a[0] == b[0]:
+ n.append(a)
+ else:
+ return self
+ n += long[idx+len(short):]
+
+ if n == self.first:
+ return self
+ elif n == o.first:
+ return o
+ else:
+ return self
+
def __repr__(self):
return "ParsedAuthor(%r,%r,%r,%r)"%(self.first,self.von,
self.last,self.jr)
def __str__(self):
- return " ".join(self.first+self.von+self.last+self.jr)
+ a = " ".join(self.first+self.von+self.last)
+ if self.jr:
+ return "%s, %s" % (a,self.jr)
+ return a
+
+ def getHomepage(self):
+ s = htmlize(str(self))
+ for pat, url in config.AUTHOR_RE_LIST:
+ if pat.search(str(self)):
+ return url
+ return None
+
+ def getSortingName(self):
+ return txtize(" ".join(self.von+self.last+self.first+self.jr))
+
+ def getSectionName(self):
+ secname = " ".join(self.last)
+ more = self.first+self.von
+ if more:
+ secname += ", "+" ".join(more)
+ if self.jr:
+ secname += ", "+" ".join(self.jr)
+ secname = htmlize(secname)
+ return secname
+
+ def htmlizeWithLink(self):
+ a = str(self)
+ a = htmlize(a)
+ u = self.getHomepage()
+ if u:
+ return "<a href='%s'>%s</a>"%(u,a)
+ else:
+ return a
def _split(s,w=79,indent=8):
r = []
@@ -522,7 +654,6 @@ class FileIter:
def next(self):
self.lineno += 1
return self._next()
-
def parseAuthor(s):
items = []
diff --git a/TODO b/TODO
index 68a9b6a..914e61a 100644
--- a/TODO
+++ b/TODO
@@ -28,9 +28,11 @@ Next:
- Also clean \_ to _ and back
- Look for urls in wherepublished.
- Forgive newlines in wherepublished, note.
- - "Systems, Zero Knowledge"?
- - When sorting by author, is Paul Syverson different from Paul
- F. Syverson?
- - What the heck is the algorithm for sorting within a year,
+ D "Systems, Zero Knowledge"?
+ - Make CSS and HTML pass the validator
+ o When sorting by author, make "Paul F. Syverson" the same person
+ as "Paul Syverson" unless somebody says different.
+ - When sorting within a year
+What the heck is the algorithm for sorting within a year,
when sorting by date? What should it be?
diff --git a/_template_.html b/_template_.html
index 473fe4c..a3c7d28 100644
--- a/_template_.html
+++ b/_template_.html
@@ -1,22 +1,25 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
-<html><head>
-<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<meta http-equiv="Content-Style-Type" content="text/css">
+<?xml version="1.0"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta http-equiv="Content-Style-Type" content="text/css" />
<!-- *** I AM MACHINE GENERATED! DO NOT EDIT ME!
- -- *** EDIT THE .bib FILE or _template_.html INSTEAD!
- --
- -- Generated by `%(command_line)s'
- -- (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 -->
+ *** EDIT THE .bib FILE or _template_.html INSTEAD!
+
+ Generated by `%(command_line)s'
+ (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 -->
<title>Anonymity Bibliography</title>
-<link rel="stylesheet" type="text/css" href="./css/main.css">
-<link rel="stylesheet" type="text/css" href="./css/pubs.css">
+<link rel="stylesheet" type="text/css" href="./css/main.css" />
+<link rel="stylesheet" type="text/css" href="./css/pubs.css" />
</head>
<body bgcolor="#ffffff" text="#000000" link="#bb0000" vlink="#990099"
-alink="#ff9900" marginheight="0" marginwidth="0">
+alink="#ff9900" >
<h1 align="center">Anonymity bibliography</h1>
<p align="center">%(choices)s</p>
@@ -29,11 +32,10 @@ alink="#ff9900" marginheight="0" marginwidth="0">
<!-- Table 2: The sidebar-->
<table align="right" cellspacing="0" cellpadding="5" width="100"
class="sidebar">
-<tr valign="top"><td><p
-class="l1"><strong>%(sectiontypes)s:</strong><br>
+<tr valign="top"><td><p class="l1"><strong>%(sectiontypes)s:</strong><br /></p>
%(sections)s
-</p></td>
-
+</td>
+</tr>
</table><!-- End of table 2 -->
</td>
@@ -41,13 +43,13 @@ class="l1"><strong>%(sectiontypes)s:</strong><br>
<h2>Publications by %(field)s</h2>
-<ul>
+<ul class="sections">
%(entries)s
</ul>
</td>
-<td width="5%%"><br></td>
+<td width="5%%"><br /></td>
</tr>
</table><!-- End of table 1 -->
diff --git a/_template_bibtex.html b/_template_bibtex.html
index 82246bd..d5bad7e 100644
--- a/_template_bibtex.html
+++ b/_template_bibtex.html
@@ -1,24 +1,28 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
-<html><head>
-<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<meta http-equiv="Content-Style-Type" content="text/css">
+<?xml version="1.0"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
+<meta http-equiv="Content-Style-Type" content="text/css" />
<!-- *** I AM MACHINE GENERATED! DO NOT EDIT ME!
- -- *** EDIT THE .bib FILE or _template_.html INSTEAD!
- --
- -- Generated by `%(command_line)s'
- -- (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 -->
+ *** EDIT THE .bib FILE or _template_.html INSTEAD!
+
+ Generated by `%(command_line)s'
+ (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 -->
<title>Anonymity Bibliography: BibTeX</title>
-<link rel="stylesheet" type="text/css" href="./css/main.css">
-<link rel="stylesheet" type="text/css" href="./css/pubs.css">
+<link rel="stylesheet" type="text/css" href="./css/main.css" />
+<link rel="stylesheet" type="text/css" href="./css/pubs.css" />
</head>
<body bgcolor="#ffffff" text="#000000" link="#bb0000" vlink="#990099"
- alink="#ff9900" marginheight="0" marginwidth="0">
+ alink="#ff9900" >
<table cellspacing="15" border="0" align="center" width="100%%">
%(entries)s
</table>
+</body>
</html>
diff --git a/config.py b/config.py
index 8de6c96..e6eacda 100644
--- a/config.py
+++ b/config.py
@@ -29,6 +29,13 @@ AUTHOR_URLS = {
}
+# List of paterns for author names _not_ to do an initial-tolerant
+# match on when building section list. E.g., if "J\\. Smith" is in
+# this list, he won't be folded into "John Smith".
+NO_COLLAPSE_AUTHORS = [
+
+]
+
INITIAL_STRINGS = {
# MONTHS
'jan' : 'January', 'feb' : 'February',
@@ -55,3 +62,7 @@ OMIT_ENTRIES = ("proceedings", "journal")
AUTHOR_RE_LIST = [
(re.compile(k, re.I), v,) for k, v in AUTHOR_URLS.items()
]
+
+NO_COLLAPSE_AUTHORS_RE_LIST = [
+ re.compile(pat, re.I) for pat in NO_COLLAPSE_AUTHORS
+ ]
diff --git a/css/main.css b/css/main.css
index 8b336c9..b14c394 100644
--- a/css/main.css
+++ b/css/main.css
@@ -49,6 +49,10 @@ UL.expand {
margin-bottom: 1em;
}
+UL.sections {
+ list-style: none;
+}
+
/* Font-level properties */
PRE {
diff --git a/css/pubs.css b/css/pubs.css
index 57b654f..07d7a37 100644
--- a/css/pubs.css
+++ b/css/pubs.css
@@ -29,6 +29,7 @@ SPAN.biblio A {
SPAN.bad {
text-decoration: underline;
+ color: #000;
background-color: #FDF;
}
@@ -38,7 +39,7 @@ P.remarks {
margin-bottom: 0;
margin-left: 5em;
padding-left: 0.5em;
- border-width: 0 0 0 5;
+ border-width: 0 0 0 5px;
border-color: black;
border-style: solid;
}
@@ -63,7 +64,7 @@ P.entry {
}
DIV.impEntry {
- border-width: 0.1;
+ border-width: 1px;
border-color: black;
border-style: solid;
background-color: #FFE;
@@ -80,7 +81,7 @@ P.impEntry {
}
TABLE.sidebar {
- border-width: 2;
+ border-width: 2px;
border-color: black;
border-style: solid;
background-color: #CFF;
@@ -88,7 +89,7 @@ TABLE.sidebar {
TD.bibtex {
font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace;
- border-width: 2;
+ border-width: 2px;
font-weight: normal;
border-color: black;
border-style: solid;
@@ -99,3 +100,4 @@ PRE.bibtex {
font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace;
font-size: smaller;
}
+
diff --git a/writeHTML.py b/writeHTML.py
index 019795f..e5d0816 100644
--- a/writeHTML.py
+++ b/writeHTML.py
@@ -25,15 +25,15 @@ def writeBody(f, sections, section_urls):
sDisp = re.sub(r'\s+', ' ', s.strip())
sDisp = sDisp.replace(" ", "&nbsp;")
if u:
- print >>f, ('<h3><a name="%s"><a href="%s">%s</a></a></h3>'%(
+ print >>f, ('<li><h3><a name="%s"></a><a href="%s">%s</a></h3>'%(
(BibTeX.url_untranslate(s), u, sDisp)))
else:
- print >>f, ('<h3><a name="%s">%s</a></h3>'%(
+ print >>f, ('<li><h3><a name="%s">%s</a></h3>'%(
BibTeX.url_untranslate(s),sDisp))
print >>f, "<ul class='expand'>"
for e in entries:
print >>f, e.to_html()
- print >>f, "</ul>"
+ print >>f, "</ul></li>"
def writeHTML(f, sections, sectionType, fieldName, choices, section_urls={}):
"""sections: list of (sectionname, [list of BibTeXEntry])'''
@@ -57,7 +57,7 @@ def writeHTML(f, sections, sectionType, fieldName, choices, section_urls={}):
else:
choiceStr.append(choice)
- choiceStr = "<p align='center'>%s</p>" % (" | ".join(choiceStr))
+ choiceStr = ("&nbsp;|&nbsp;".join(choiceStr))
fields = { 'command_line' : "",
'sectiontypes' : sectionType,