diff options
| author | Nick Mathewson <nickm@torproject.org> | 2003-09-09 00:25:29 +0000 |
|---|---|---|
| committer | Nick Mathewson <nickm@torproject.org> | 2003-09-09 00:25:29 +0000 |
| commit | c666e2e0794ba284b5a0c0f17d54c4a3233ea187 (patch) | |
| tree | 498512e8706921fa27af73e5f28d665859b50379 | |
| parent | d3acfd8b215920491745f462d08fcd5e963a52ca (diff) | |
| download | anonbib-c666e2e0794ba284b5a0c0f17d54c4a3233ea187.tar.gz | |
Add a simple caching implementation
svn:r67
| -rw-r--r-- | .cvsignore | 1 | ||||
| -rw-r--r-- | BibTeX.py | 43 | ||||
| -rw-r--r-- | config.py | 6 | ||||
| -rw-r--r-- | updateCache.py | 132 | ||||
| -rw-r--r-- | writeHTML.py | 1 |
5 files changed, 168 insertions, 15 deletions
@@ -4,3 +4,4 @@ date.html topic.html author.html bibtex.html +cache
\ No newline at end of file @@ -3,6 +3,7 @@ import cStringIO import re import sys +import os import config @@ -414,21 +415,33 @@ class BibTeXEntry: res.append("<span class='title'><a name='%s'>%s</a></span>"%( url_untranslate(self.key),htmlize(self['title']))) - availability = [] - for key, name in (('www_abstract_url', 'abstract'), - ('www_html_url', 'HTML'), - ('www_pdf_url', 'PDF'), - ('www_ps_url', 'PS'), - ('www_txt_url', 'TXT'), - ('www_ps_gz_url', 'gzipped PS')): - url = self.get(key) - if not url: continue - url = unTeXescapeURL(url) - availability.append('<a href="%s">%s</a>' %(url,name)) - if availability: - res.append(" <span class='availability'>(") - res.append(", ".join(availability)) - res.append(")</span>") + for cached in 0,1: + availability = [] + for key, name, ext in (('www_abstract_url', 'abstract','abstract'), + ('www_html_url', 'HTML', 'html'), + ('www_pdf_url', 'PDF', 'pdf'), + ('www_ps_url', 'PS', 'ps'), + ('www_txt_url', 'TXT', 'txt'), + ('www_ps_gz_url', 'gzipped PS','ps.gz') + ): + if cached: + url = os.path.join(".", config.CACHE_DIR, + "%s.%s"%(self.key,ext)) + fname = os.path.join(config.OUTPUT_DIR, config.CACHE_DIR, + "%s.%s"%(self.key,ext)) + if not os.path.exists(fname): continue + else: + url = self.get(key) + if not url: continue + url = unTeXescapeURL(url) + availability.append('<a href="%s">%s</a>' %(url,name)) + + if availability: + res.append(" <span class='availability'>(") + if cached: res.append("Cached: ") + res.append(", ".join(availability)) + res.append(")</span>") + res.append("<br /><span class='author'>by ") #res.append("\n<!-- %r -->\n" % self.parsedAuthor) @@ -5,6 +5,12 @@ MASTER_BIB = "./anonbib.bib" OUTPUT_DIR = "." +# relative to OUTPUT_DIR. +CACHE_DIR = "cache" + +# Time to connect to a server while caching. +DOWNLOAD_CONNECT_TIMEOUT = 15 + AUTHOR_URLS = { 'Ross.*Anderson' : 'http://www.cl.cam.ac.uk/users/rja14/', 'Alessandro.*Acquisti' : 'http://www.sims.berkeley.edu/~acquisti/', diff --git a/updateCache.py b/updateCache.py new file mode 100644 index 0000000..5edda3d --- /dev/null +++ b/updateCache.py @@ -0,0 +1,132 @@ +#!/usr/bin/python2 + +"""Download files in bibliography into a local cache, in order to +""" + +import os +import sys +import signal +import time + +import BibTeX +import config +import urllib2 +import getopt +import socket +import errno + +os.umask(022) + +FILE_TYPES = [ "txt", "html", "pdf", "ps", "ps.gz", "abstract" ] +BIN_FILE_TYPES = [ 'pdf', 'ps.gz' ] + +class UIError(Exception): + pass + +def tryUnlink(fn): + try: + os.unlink(fn) + except OSError: + pass + +def getCacheFname(key, ftype): + return os.path.join(config.OUTPUT_DIR,config.CACHE_DIR, + "%s.%s"%(key,ftype)) + + +def downloadFile(key, ftype, url, timeout=config.DOWNLOAD_CONNECT_TIMEOUT): + fname = getCacheFname(key, ftype) + fnameTmp = fname+".tmp" + fnameURL = fname+".url" + tryUnlink(fnameTmp) + + def sigalrmHandler(sig,_): + pass + signal.signal(signal.SIGALRM, sigalrmHandler) + signal.alarm(timeout) + try: + try: + infile = urllib2.urlopen(url) + except IOError, e: + raise UIError("Cannot connect to url %s: %s"%(url,e)) + except socket.error, e: + if getattr(e,"errno",-1) == errno.EINTR: + raise UIError("Connection timed out to url %s"%url) + else: + raise UIError("Error connecting to %s: %s"%(url, e)) + finally: + signal.alarm(0) + + mode = 'w' + if ftype in BIN_FILE_TYPES: + mode = 'wb' + outfile = open(fnameTmp, mode) + try: + while 1: + s = infile.read(1<<16) + if not s: break + outfile.write(s) + finally: + infile.close() + outfile.close() + + urlfile = open(fnameURL, 'w') + print >>urlfile, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + if "\n" in url: url = url.replace("\n", " ") + print >>urlfile, url + urlfile.close() + + os.rename(fnameTmp, fname) + +def getURLs(entry): + r = {} + for ftype in FILE_TYPES: + ftype2 = ftype.replace(".", "_") + url = entry.get("www_%s_url"%ftype2) + if url: + r[ftype] = url.strip().replace("\n", " ") + return r + +def getCachedURL(key, ftype): + fname = getCacheFname(key, ftype) + urlFname = fname+".url" + if not os.path.exists(fname) or not os.path.exists(urlFname): + return None + f = open(urlFname, 'r') + lines = f.readlines() + f.close() + if len(lines) != 2: + print >>sys.stderr, "ERROR: unexpected number of lines in", urlFname + return lines[1].strip() + +def downloadAll(bibtex, missingOnly=0): + """returns list of tuples of key, ftype, url, error""" + errors = [] + for e in bibtex.entries: + urls = getURLs(e) + key = e.key + for ftype, url in urls.items(): + fname = getCacheFname(key, ftype) + if missingOnly: + cachedURL = getCachedURL(key, ftype) + if cachedURL == url: + print >>sys.stderr,"Skipping",url + continue + elif cachedURL is not None: + print >>sys.stderr,"URL for %s.%s has changed"%(key,ftype) + else: + print >>sys.stderr,"No record for %s.%s"%(key,ftype) + try: + downloadFile(key, ftype, url) + print "Downloaded",url + except UIError, e: + print >>sys,stderr, str(e) + errors.append((key,ftype,url,str(e))) + except (IOError, socket.error), e: + msg = "Error downloading %s: %s"%(url,str(e)) + print >>sys.stderr, msg + errors.append((key,ftype,url,msg)) + return errors + +bib = BibTeX.parseFile(config.MASTER_BIB) +downloadAll(bib,missingOnly=1) diff --git a/writeHTML.py b/writeHTML.py index 85b1aa3..d7b1ffa 100644 --- a/writeHTML.py +++ b/writeHTML.py @@ -5,6 +5,7 @@ import re import os assert sys.version_info[:3] >= (2,2,0) +os.umask(022) import BibTeX import config |
