aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2003-09-09 00:25:29 +0000
committerNick Mathewson <nickm@torproject.org>2003-09-09 00:25:29 +0000
commitc666e2e0794ba284b5a0c0f17d54c4a3233ea187 (patch)
tree498512e8706921fa27af73e5f28d665859b50379
parentd3acfd8b215920491745f462d08fcd5e963a52ca (diff)
downloadanonbib-c666e2e0794ba284b5a0c0f17d54c4a3233ea187.tar.gz
Add a simple caching implementation
svn:r67
-rw-r--r--.cvsignore1
-rw-r--r--BibTeX.py43
-rw-r--r--config.py6
-rw-r--r--updateCache.py132
-rw-r--r--writeHTML.py1
5 files changed, 168 insertions, 15 deletions
diff --git a/.cvsignore b/.cvsignore
index 432c089..6ca418d 100644
--- a/.cvsignore
+++ b/.cvsignore
@@ -4,3 +4,4 @@ date.html
topic.html
author.html
bibtex.html
+cache \ No newline at end of file
diff --git a/BibTeX.py b/BibTeX.py
index ac5ba5d..b9d36ec 100644
--- a/BibTeX.py
+++ b/BibTeX.py
@@ -3,6 +3,7 @@
import cStringIO
import re
import sys
+import os
import config
@@ -414,21 +415,33 @@ class BibTeXEntry:
res.append("<span class='title'><a name='%s'>%s</a></span>"%(
url_untranslate(self.key),htmlize(self['title'])))
- availability = []
- for key, name in (('www_abstract_url', 'abstract'),
- ('www_html_url', 'HTML'),
- ('www_pdf_url', 'PDF'),
- ('www_ps_url', 'PS'),
- ('www_txt_url', 'TXT'),
- ('www_ps_gz_url', 'gzipped&nbsp;PS')):
- url = self.get(key)
- if not url: continue
- url = unTeXescapeURL(url)
- availability.append('<a href="%s">%s</a>' %(url,name))
- if availability:
- res.append(" <span class='availability'>(")
- res.append(",&nbsp;".join(availability))
- res.append(")</span>")
+ for cached in 0,1:
+ availability = []
+ for key, name, ext in (('www_abstract_url', 'abstract','abstract'),
+ ('www_html_url', 'HTML', 'html'),
+ ('www_pdf_url', 'PDF', 'pdf'),
+ ('www_ps_url', 'PS', 'ps'),
+ ('www_txt_url', 'TXT', 'txt'),
+ ('www_ps_gz_url', 'gzipped&nbsp;PS','ps.gz')
+ ):
+ if cached:
+ url = os.path.join(".", config.CACHE_DIR,
+ "%s.%s"%(self.key,ext))
+ fname = os.path.join(config.OUTPUT_DIR, config.CACHE_DIR,
+ "%s.%s"%(self.key,ext))
+ if not os.path.exists(fname): continue
+ else:
+ url = self.get(key)
+ if not url: continue
+ url = unTeXescapeURL(url)
+ availability.append('<a href="%s">%s</a>' %(url,name))
+
+ if availability:
+ res.append(" <span class='availability'>(")
+ if cached: res.append("Cached:&nbsp;")
+ res.append(",&nbsp;".join(availability))
+ res.append(")</span>")
+
res.append("<br /><span class='author'>by ")
#res.append("\n<!-- %r -->\n" % self.parsedAuthor)
diff --git a/config.py b/config.py
index 76e486e..a777659 100644
--- a/config.py
+++ b/config.py
@@ -5,6 +5,12 @@ MASTER_BIB = "./anonbib.bib"
OUTPUT_DIR = "."
+# relative to OUTPUT_DIR.
+CACHE_DIR = "cache"
+
+# Time to connect to a server while caching.
+DOWNLOAD_CONNECT_TIMEOUT = 15
+
AUTHOR_URLS = {
'Ross.*Anderson' : 'http://www.cl.cam.ac.uk/users/rja14/',
'Alessandro.*Acquisti' : 'http://www.sims.berkeley.edu/~acquisti/',
diff --git a/updateCache.py b/updateCache.py
new file mode 100644
index 0000000..5edda3d
--- /dev/null
+++ b/updateCache.py
@@ -0,0 +1,132 @@
+#!/usr/bin/python2
+
+"""Download files in bibliography into a local cache, in order to
+"""
+
+import os
+import sys
+import signal
+import time
+
+import BibTeX
+import config
+import urllib2
+import getopt
+import socket
+import errno
+
+os.umask(022)
+
+FILE_TYPES = [ "txt", "html", "pdf", "ps", "ps.gz", "abstract" ]
+BIN_FILE_TYPES = [ 'pdf', 'ps.gz' ]
+
+class UIError(Exception):
+ pass
+
+def tryUnlink(fn):
+ try:
+ os.unlink(fn)
+ except OSError:
+ pass
+
+def getCacheFname(key, ftype):
+ return os.path.join(config.OUTPUT_DIR,config.CACHE_DIR,
+ "%s.%s"%(key,ftype))
+
+
+def downloadFile(key, ftype, url, timeout=config.DOWNLOAD_CONNECT_TIMEOUT):
+ fname = getCacheFname(key, ftype)
+ fnameTmp = fname+".tmp"
+ fnameURL = fname+".url"
+ tryUnlink(fnameTmp)
+
+ def sigalrmHandler(sig,_):
+ pass
+ signal.signal(signal.SIGALRM, sigalrmHandler)
+ signal.alarm(timeout)
+ try:
+ try:
+ infile = urllib2.urlopen(url)
+ except IOError, e:
+ raise UIError("Cannot connect to url %s: %s"%(url,e))
+ except socket.error, e:
+ if getattr(e,"errno",-1) == errno.EINTR:
+ raise UIError("Connection timed out to url %s"%url)
+ else:
+ raise UIError("Error connecting to %s: %s"%(url, e))
+ finally:
+ signal.alarm(0)
+
+ mode = 'w'
+ if ftype in BIN_FILE_TYPES:
+ mode = 'wb'
+ outfile = open(fnameTmp, mode)
+ try:
+ while 1:
+ s = infile.read(1<<16)
+ if not s: break
+ outfile.write(s)
+ finally:
+ infile.close()
+ outfile.close()
+
+ urlfile = open(fnameURL, 'w')
+ print >>urlfile, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+ if "\n" in url: url = url.replace("\n", " ")
+ print >>urlfile, url
+ urlfile.close()
+
+ os.rename(fnameTmp, fname)
+
+def getURLs(entry):
+ r = {}
+ for ftype in FILE_TYPES:
+ ftype2 = ftype.replace(".", "_")
+ url = entry.get("www_%s_url"%ftype2)
+ if url:
+ r[ftype] = url.strip().replace("\n", " ")
+ return r
+
+def getCachedURL(key, ftype):
+ fname = getCacheFname(key, ftype)
+ urlFname = fname+".url"
+ if not os.path.exists(fname) or not os.path.exists(urlFname):
+ return None
+ f = open(urlFname, 'r')
+ lines = f.readlines()
+ f.close()
+ if len(lines) != 2:
+ print >>sys.stderr, "ERROR: unexpected number of lines in", urlFname
+ return lines[1].strip()
+
+def downloadAll(bibtex, missingOnly=0):
+ """returns list of tuples of key, ftype, url, error"""
+ errors = []
+ for e in bibtex.entries:
+ urls = getURLs(e)
+ key = e.key
+ for ftype, url in urls.items():
+ fname = getCacheFname(key, ftype)
+ if missingOnly:
+ cachedURL = getCachedURL(key, ftype)
+ if cachedURL == url:
+ print >>sys.stderr,"Skipping",url
+ continue
+ elif cachedURL is not None:
+ print >>sys.stderr,"URL for %s.%s has changed"%(key,ftype)
+ else:
+ print >>sys.stderr,"No record for %s.%s"%(key,ftype)
+ try:
+ downloadFile(key, ftype, url)
+ print "Downloaded",url
+ except UIError, e:
+ print >>sys,stderr, str(e)
+ errors.append((key,ftype,url,str(e)))
+ except (IOError, socket.error), e:
+ msg = "Error downloading %s: %s"%(url,str(e))
+ print >>sys.stderr, msg
+ errors.append((key,ftype,url,msg))
+ return errors
+
+bib = BibTeX.parseFile(config.MASTER_BIB)
+downloadAll(bib,missingOnly=1)
diff --git a/writeHTML.py b/writeHTML.py
index 85b1aa3..d7b1ffa 100644
--- a/writeHTML.py
+++ b/writeHTML.py
@@ -5,6 +5,7 @@ import re
import os
assert sys.version_info[:3] >= (2,2,0)
+os.umask(022)
import BibTeX
import config