aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--BibTeX.py55
-rw-r--r--Makefile6
-rw-r--r--TODO4
-rw-r--r--_template_bibtex.html2
-rw-r--r--config.py132
-rw-r--r--metaphone.py1
-rw-r--r--updateCache.py29
-rw-r--r--writeHTML.py13
8 files changed, 100 insertions, 142 deletions
diff --git a/BibTeX.py b/BibTeX.py
index 5f71708..1637ff0 100644
--- a/BibTeX.py
+++ b/BibTeX.py
@@ -26,13 +26,11 @@ MONTHS = [ None,
# recognize them.)
WWW_FIELDS = [ 'www_section', 'www_important', 'www_remarks',
'www_abstract_url', 'www_html_url', 'www_pdf_url', 'www_ps_url',
- 'www_txt_url', 'www_ps_gz_url', 'www_amazon_url',
- 'www_excerpt_url' ]
+ 'www_txt_url', 'www_ps_gz_url', 'www_amazon_url',
+ 'www_excerpt_url', 'www_cache_section' ]
def url_untranslate(s):
"""Change a BibTeX key into a string suitable for use in a URL."""
- #s = s.replace(" ", "_")
- #s = s.replace(',', "_")
s = re.sub(r'([%<>, _])',
lambda m: "_%02x"%ord(m.group(1)),
s)
@@ -42,6 +40,19 @@ class ParseError(Exception):
"""Raised on invalid BibTeX"""
pass
+
+def smartJoin(*lst):
+ """Equivalent to os.path.join, but handle"." and ".." entries a bit better.
+ """
+ lst = [ item for item in lst if item != "." ]
+ idx = 0
+ while idx < len(lst):
+ if idx > 0 and lst[idx] == "..":
+ del lst[idx]
+ else:
+ idx += 1
+ return os.path.join(*lst)
+
class BibTeX:
"""A parsed BibTeX file"""
def __init__(self):
@@ -485,6 +496,13 @@ class BibTeXEntry:
url = unTeXescapeURL(url)
availability.append('<a href="%s">%s</a>' %(url,"excerpt"))
+ cache_section = self.get('www_cache_section', ".")
+ if cache_section not in config.CACHE_SECTIONS:
+ if cache_section != ".":
+ print >>sys.stderr, "Unrecognized cache section %s"%(
+ cache_section)
+ cache_section="."
+
for key, name, ext in (('www_abstract_url', 'abstract','abstract'),
('www_html_url', 'HTML', 'html'),
('www_pdf_url', 'PDF', 'pdf'),
@@ -493,10 +511,11 @@ class BibTeXEntry:
('www_ps_gz_url', 'gzipped&nbsp;PS','ps.gz')
):
if cached:
- url = os.path.join(".", config.CACHE_DIR,
- "%s.%s"%(self.key,ext))
- fname = os.path.join(config.OUTPUT_DIR, config.CACHE_DIR,
- "%s.%s"%(self.key,ext))
+ url = smartJoin(".", config.CACHE_DIR,cache_section,
+ "%s.%s"%(self.key,ext))
+ fname = smartJoin(config.OUTPUT_DIR, config.CACHE_DIR,
+ cache_section,
+ "%s.%s"%(self.key,ext))
if not os.path.exists(fname): continue
else:
url = self.get(key)
@@ -607,8 +626,8 @@ def author_url(author):
return None
def txtize(s):
- """Turn a TeX string into decent plaintext."""
- s = RE_LONE_I.sub(lambda m: "%s" % m.group(1), s)
+ """Turn a TeX string into decnent plaintext."""
+ s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s)
s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s)
s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s)
s = RE_TEX_CMD.sub("", s)
@@ -807,7 +826,6 @@ def parseAuthor(s):
"""Take an author string and return a list of ParsedAuthor."""
items = []
- #print "A", `s`
s = s.strip()
while s:
s = s.strip()
@@ -827,8 +845,6 @@ def parseAuthor(s):
items.append(',')
s = s[i+1:]
- #print "B", items
-
authors = [[]]
for item in items:
if item == 'and':
@@ -836,13 +852,9 @@ def parseAuthor(s):
else:
authors[-1].append(item)
- #print "C", authors
-
parsedAuthors = []
# Split into first, von, last, jr
for author in authors:
- #print author
-
commas = 0
fvl = []
vl = []
@@ -859,16 +871,18 @@ def parseAuthor(s):
cur = f
else:
j.extend(f)
- f = []
+ cur = f = []
+ commas += 1
else:
cur.append(item)
+
if commas == 0:
split_von(f,v,l,fvl)
else:
- split_von(None,v,l,vl)
+ f_tmp = []
+ split_von(f_tmp,v,l,vl)
parsedAuthors.append(ParsedAuthor(f,v,l,j))
- #print " ====> ", parsedAuthors[-1]
return parsedAuthors
@@ -901,6 +915,7 @@ def split_von(f,v,l,x):
l.append(f[-1])
del f[-1]
+
class Parser:
"""Parser class: reads BibTeX from a file and returns a BibTeX object."""
def __init__(self, fileiter, initial_strings, result=None):
diff --git a/Makefile b/Makefile
index b72ab63..e89a9e8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,15 +1,15 @@
PYTHON=python2
-VERSION=0.1
+VERSION=0.2cvs
all:
- $(PYTHON) writeHTML.py
+ $(PYTHON) writeHTML.py anonbib.cfg
clean:
rm -f *~ */*~ *.pyc *.pyo
update:
- $(PYTHON) updateCache.py
+ $(PYTHON) updateCache.py anonbib.cfg
veryclean: clean
rm -f author.html date.html topic.html bibtex.html tmp.bib
diff --git a/TODO b/TODO
index fe39c7a..6ff9c77 100644
--- a/TODO
+++ b/TODO
@@ -1,6 +1,8 @@
- More general tasks
. Know about @book
+ . Write unit tests for everything
+ . Make name parsing vaguely sane
- Maybe uncrossref in tmp.bib
- Maybe pull important papers to the start of their sections?
. Clean \{}~ when going from note to url; add \{}~ when making
@@ -11,7 +13,7 @@
- When sorting by date, entries with unknown months go into a magic
"month zero" before January. Is this right?
- Strip unused features.
- - Take a configuration file on the command line instead of just
+ o Take a configuration file on the command line instead of just
importing config.py.
- Cache tasks
diff --git a/_template_bibtex.html b/_template_bibtex.html
index d5bad7e..ee4c8f3 100644
--- a/_template_bibtex.html
+++ b/_template_bibtex.html
@@ -8,7 +8,7 @@
<!-- *** I AM MACHINE GENERATED! DO NOT EDIT ME!
*** EDIT THE .bib FILE or _template_.html INSTEAD!
-
+
Generated by `%(command_line)s'
(c) Eddie Kohler 1999-2000, Nick Mathewson 2003 -->
diff --git a/config.py b/config.py
index 0fea53c..68fbbf7 100644
--- a/config.py
+++ b/config.py
@@ -2,92 +2,38 @@
import re
-# Our input filename.
-MASTER_BIB = "./anonbib.bib"
+_KEYS = [ "ALPHABETIZE_AUTHOR_AS","AUTHOR_URLS","CACHE_DIR","CACHE_SECTIONS",
+ "COLLAPSE_AUTHORS",
+ "DOWNLOAD_CONNECT_TIMEOUT","INITIAL_STRINGS",
+ "MASTER_BIB", "NO_COLLAPSE_AUTHORS", "OMIT_ENTRIES",
+ "OUTPUT_DIR", "TEMPLATE_FILE", "BIBTEX_TEMPLATE_FILE" ]
-# Where do we put generated HTML?
-OUTPUT_DIR = "."
+for _k in _KEYS:
+ globals()[_k]=None
-# Where do we put cached papers (relative to OUTPUT_DIR)
-CACHE_DIR = "cache"
+def load(cfgFile):
+ mod = {}
+ execfile(cfgFile, mod)
+ for _k in _KEYS:
+ try:
+ globals()[_k]=mod[_k]
+ except KeyError:
+ raise KeyError("Configuration option %s is missing"%_k)
-# Timeout when downloading from a server while caching, in seconds.
-DOWNLOAD_CONNECT_TIMEOUT = 15
+ INITIAL_STRINGS.update(_EXTRA_INITIAL_STRINGS)
+ AUTHOR_RE_LIST[:] = [
+ (re.compile(k, re.I), v,) for k, v in AUTHOR_URLS.items()
+ ]
-# Map from author name regex to author homepage.
-AUTHOR_URLS = {
- 'Ross.*Anderson' : 'http://www.cl.cam.ac.uk/users/rja14/',
- 'Alessandro.*Acquisti' : 'http://www.sims.berkeley.edu/~acquisti/',
- 'Adam.*Back' : 'http://www.cypherspace.org/~adam/',
- 'Berthold' : 'http://page.inf.fu-berlin.de/~berthold/',
- 'Miguel.*Castro' : 'http://research.microsoft.com/users/mcastro/',
- 'Chaum' : 'http://www.chaum.com/',
- 'J.*Claessens' : 'http://www.esat.kuleuven.ac.be/~joclaess/',
- 'R.*Clayton' : 'http://www.cl.cam.ac.uk/~rnc1/',
- 'Danezis' : 'http://www.cl.cam.ac.uk/~gd216/',
- 'Claudia.*az' : 'http://www.esat.kuleuven.ac.be/~cdiaz/',
- 'Dingledine' : 'http://www.freehaven.net/~arma/cv.html',
- 'Desmedt' : 'http://www.cs.fsu.edu/~desmedt/',
- 'Douceur' : 'http://research.microsoft.com/~johndo/',
- 'Michael.*Freedman' : 'http://www.scs.cs.nyu.edu/~mfreed/',
- 'Ian.*Goldberg' : 'http://www.cs.berkeley.edu/~iang/',
- 'Christian.*Grothoff' : 'http://www.ovmj.org/~grothoff/',
- 'D.*Hopwood' : 'http://www.users.zetnet.co.uk/hopwood/',
- 'Jakobsson' : 'http://www.rsasecurity.com/rsalabs/staff/bios/mjakobsson/',
- 'Juels' : 'http://www.rsasecurity.com/rsalabs/staff/bios/ajuels/',
- 'K.*Kurosawa' : 'http://kuro.cis.ibaraki.ac.jp/~kurosawa/',
- 'H.*Langos' : 'http://www.wh9.tu-dresden.de/~heinrich/',
- 'B.*Liskov' : 'http://www.pmg.lcs.mit.edu/barbara_liskov.html',
- 'Mathewson' : 'http://www.wangafu.net/~nickm/',
- 'Mazi&egrave;res' : 'http://www.scs.cs.nyu.edu/~dm/',
- 'B.*M&ouml;ller' : ('http://www.informatik.tu-darmstadt.de/TI/'
- 'Mitarbeiter/moeller.html'),
- 'U.*M&ouml;ller' : 'http://www.ulfm.de/',
- 'D.*Molnar' : 'http://hcs.harvard.edu/~dmolnar/papers.html',
- 'R.*Morris' : 'http://www.pdos.lcs.mit.edu/~rtm/',
- 'A.*Pfitzmann' : 'http://dud.inf.tu-dresden.de/~pfitza/',
- 'B.*Pfitzmann' : 'http://www.zurich.ibm.com/~bpf/',
- 'B.*Preneel' : 'http://www.esat.kuleuven.ac.be/~preneel/',
- 'Daniel.*Simon' : 'http://research.microsoft.com/crypto/dansimon/me.htm',
- 'Rackoff' : 'http://www.cs.toronto.edu/DCS/People/Faculty/rackoff.html',
- 'Jean F' : 'http://www.geocities.com/j_f_raymond/',
- 'M.*Rennhard' : 'http://www.tik.ee.ethz.ch/~rennhard/',
- 'M.*Reiter' : 'http://www.ece.cmu.edu/~reiter/',
- 'Rivest' : 'http://theory.lcs.mit.edu/~rivest/',
- 'Avi.*Rubin' : 'http://avirubin.com/',
- 'Serjantov' : 'http://www.cl.cam.ac.uk/users/aas23/',
- 'S.*Seys' : 'http://www.esat.kuleuven.ac.be/~sseys/',
- 'Shoup' : 'http://www.shoup.net/',
- 'Syverson' : 'http://www.syverson.org/',
- 'Tsudik' : 'http://www.ics.uci.edu/~gts/c.html',
- 'M.*Waidner' : 'http://www.zurich.ibm.com/~wmi/',
- 'David.*Wagner' : 'http://www.cs.berkeley.edu/~daw/',
- 'M.*Waldman' : 'http://cs1.cs.nyu.edu/~waldman/',
- 'B.*Waters' : 'http://www.cs.princeton.edu/~bwaters/',
- 'M.*Wright' : 'http://www.cs.umass.edu/~mwright/',
- }
+ NO_COLLAPSE_AUTHORS_RE_LIST[:] = [
+ re.compile(pat, re.I) for pat in NO_COLLAPSE_AUTHORS
+ ]
-# List of paterns for author names _not_ to do an initial-tolerant
-# match on when building section list. E.g., if "J\\. Smith" is in
-# this list, he won't be folded into "John Smith".
-NO_COLLAPSE_AUTHORS = [
+ ALPHABETIZE_AUTHOR_AS_RE_LIST[:] = [
+ (re.compile(k, re.I), v,) for k,v in ALPHABETIZE_AUTHOR_AS.items()
+ ]
-]
-
-# Map from LaTeX-style name of author to collapse to canonical name.
-COLLAPSE_AUTHORS = {
- "Nicholas Mathewson": "Nick Mathewson",
- }
-
-# Map from author pattern to collation key.
-# This keeps 'Zero Knowledge Systems' from getting alphabetized as "Systems,
-# Zero Knowledge."
-ALPHABETIZE_AUTHOR_AS = {
- "Zero.*Knowledge.*Systems": "Zero Knowledge Systems",
- }
-
-# Map of strings to initialzie BibTeX parsing with.
-INITIAL_STRINGS = {
+_EXTRA_INITIAL_STRINGS = {
# MONTHS
'jan' : 'January', 'feb' : 'February',
'mar' : 'March', 'apr' : 'April',
@@ -95,30 +41,10 @@ INITIAL_STRINGS = {
'jul' : 'July', 'aug' : 'August',
'sep' : 'September', 'oct' : 'October',
'nov' : 'November', 'dec' : 'December',
-
- # SECTIONS
- 'sec_mix' : "Mix Networks: Design",
- 'sec_mixattacks' : "Mix Networks: Attacks",
- 'sec_stream' : "Stream-based anonymity",
- 'sec_traffic' : "Traffic analysis",
- 'sec_pub' : "Anonymous publication",
- 'sec_nym' : "Pseudonymity"
}
-# Don't put in any entries of this type.
-OMIT_ENTRIES = ("proceedings", "journal")
-
-
-### Don't edit below this line
-
-AUTHOR_RE_LIST = [
- (re.compile(k, re.I), v,) for k, v in AUTHOR_URLS.items()
- ]
+AUTHOR_RE_LIST = []
-NO_COLLAPSE_AUTHORS_RE_LIST = [
- re.compile(pat, re.I) for pat in NO_COLLAPSE_AUTHORS
- ]
+NO_COLLAPSE_AUTHORS_RE_LIST = []
-ALPHABETIZE_AUTHOR_AS_RE_LIST = [
- (re.compile(k, re.I), v,) for k,v in ALPHABETIZE_AUTHOR_AS.items()
- ]
+ALPHABETIZE_AUTHOR_AS_RE_LIST = []
diff --git a/metaphone.py b/metaphone.py
index 9c261d6..e86d4fc 100644
--- a/metaphone.py
+++ b/metaphone.py
@@ -188,7 +188,6 @@ def metaphone(s):
def demo(a):
print a, "=>", metaphone(a)
-print __name__
if __name__ == '__main__':
demo("Nick. Mathewson")
diff --git a/updateCache.py b/updateCache.py
index b746f8c..cb2417a 100644
--- a/updateCache.py
+++ b/updateCache.py
@@ -30,13 +30,14 @@ def tryUnlink(fn):
except OSError:
pass
-def getCacheFname(key, ftype):
- return os.path.join(config.OUTPUT_DIR,config.CACHE_DIR,
- "%s.%s"%(key,ftype))
+def getCacheFname(key, ftype, section):
+ return BibTeX.smartJoin(config.OUTPUT_DIR,config.CACHE_DIR,
+ section,
+ "%s.%s"%(key,ftype))
-
-def downloadFile(key, ftype, url, timeout=config.DOWNLOAD_CONNECT_TIMEOUT):
- fname = getCacheFname(key, ftype)
+def downloadFile(key, ftype, section, url,
+ timeout=config.DOWNLOAD_CONNECT_TIMEOUT):
+ fname = getCacheFname(key, ftype, section)
fnameTmp = fname+".tmp"
fnameURL = fname+".url"
tryUnlink(fnameTmp)
@@ -89,7 +90,7 @@ def getURLs(entry):
return r
def getCachedURL(key, ftype):
- fname = getCacheFname(key, ftype)
+ fname = getCacheFname(key, ftype, section)
urlFname = fname+".url"
if not os.path.exists(fname) or not os.path.exists(urlFname):
return None
@@ -106,10 +107,11 @@ def downloadAll(bibtex, missingOnly=0):
for e in bibtex.entries:
urls = getURLs(e)
key = e.key
+ section = e.get("www_cache_section", ".")
for ftype, url in urls.items():
- fname = getCacheFname(key, ftype)
+ fname = getCacheFname(key, ftype, section)
if missingOnly:
- cachedURL = getCachedURL(key, ftype)
+ cachedURL = getCachedURL(key, ftype, section)
if cachedURL == url:
print >>sys.stderr,"Skipping",url
continue
@@ -118,7 +120,7 @@ def downloadAll(bibtex, missingOnly=0):
else:
print >>sys.stderr,"No record for %s.%s"%(key,ftype)
try:
- downloadFile(key, ftype, url)
+ downloadFile(key, ftype, section, url)
print "Downloaded",url
except UIError, e:
print >>sys.stderr, str(e)
@@ -129,5 +131,12 @@ def downloadAll(bibtex, missingOnly=0):
errors.append((key,ftype,url,msg))
return errors
+if len(sys.argv) == 2:
+ print "Loading from %s"%sys.argv[1]
+else:
+ print >>sys.stderr, "Expected a single configuration file as an argument"
+ sys.exit(1)
+config.load(sys.argv[1])
+
bib = BibTeX.parseFile(config.MASTER_BIB)
downloadAll(bib,missingOnly=1)
diff --git a/writeHTML.py b/writeHTML.py
index 9f6a0fa..d25a0a0 100644
--- a/writeHTML.py
+++ b/writeHTML.py
@@ -14,7 +14,7 @@ import BibTeX
import config
def getTemplate(name):
- f = open(name+".html")
+ f = open(name)
template = f.read()
f.close()
template_s, template_e = template.split("%(entries)s")
@@ -70,11 +70,18 @@ def writeHTML(f, sections, sectionType, fieldName, choices, section_urls={}):
'sections' : secStr,
}
- header, footer = getTemplate("_template_")
+ header, footer = getTemplate(config.TEMPLATE_FILE)
print >>f, header%fields
writeBody(f, sections, section_urls)
print >>f, footer%fields
+if len(sys.argv) == 2:
+ print "Loading from %s"%sys.argv[1]
+else:
+ print >>sys.stderr, "Expected a single configuration file as an argument"
+ sys.exit(1)
+config.load(sys.argv[1])
+
bib = BibTeX.parseFile(config.MASTER_BIB)
##### Sorted views:
@@ -145,7 +152,7 @@ entries = bib.entries[:]
entries = [ (ent.key, ent) for ent in entries ]
entries.sort()
entries = [ ent[1] for ent in entries ]
-header,footer = getTemplate("_template_bibtex")
+header,footer = getTemplate(config.BIBTEX_TEMPLATE_FILE)
f = open(os.path.join(config.OUTPUT_DIR,"bibtex.html"), 'w')
print >>f, header % { 'command_line' : "" }
for ent in entries: