aboutsummaryrefslogtreecommitdiffstats
path: root/updateCache.py
diff options
context:
space:
mode:
Diffstat (limited to 'updateCache.py')
-rw-r--r--updateCache.py132
1 files changed, 132 insertions, 0 deletions
diff --git a/updateCache.py b/updateCache.py
new file mode 100644
index 0000000..5edda3d
--- /dev/null
+++ b/updateCache.py
@@ -0,0 +1,132 @@
+#!/usr/bin/python2
+
+"""Download files in bibliography into a local cache, in order to
+"""
+
+import os
+import sys
+import signal
+import time
+
+import BibTeX
+import config
+import urllib2
+import getopt
+import socket
+import errno
+
+os.umask(022)
+
+FILE_TYPES = [ "txt", "html", "pdf", "ps", "ps.gz", "abstract" ]
+BIN_FILE_TYPES = [ 'pdf', 'ps.gz' ]
+
+class UIError(Exception):
+ pass
+
+def tryUnlink(fn):
+ try:
+ os.unlink(fn)
+ except OSError:
+ pass
+
+def getCacheFname(key, ftype):
+ return os.path.join(config.OUTPUT_DIR,config.CACHE_DIR,
+ "%s.%s"%(key,ftype))
+
+
+def downloadFile(key, ftype, url, timeout=config.DOWNLOAD_CONNECT_TIMEOUT):
+ fname = getCacheFname(key, ftype)
+ fnameTmp = fname+".tmp"
+ fnameURL = fname+".url"
+ tryUnlink(fnameTmp)
+
+ def sigalrmHandler(sig,_):
+ pass
+ signal.signal(signal.SIGALRM, sigalrmHandler)
+ signal.alarm(timeout)
+ try:
+ try:
+ infile = urllib2.urlopen(url)
+ except IOError, e:
+ raise UIError("Cannot connect to url %s: %s"%(url,e))
+ except socket.error, e:
+ if getattr(e,"errno",-1) == errno.EINTR:
+ raise UIError("Connection timed out to url %s"%url)
+ else:
+ raise UIError("Error connecting to %s: %s"%(url, e))
+ finally:
+ signal.alarm(0)
+
+ mode = 'w'
+ if ftype in BIN_FILE_TYPES:
+ mode = 'wb'
+ outfile = open(fnameTmp, mode)
+ try:
+ while 1:
+ s = infile.read(1<<16)
+ if not s: break
+ outfile.write(s)
+ finally:
+ infile.close()
+ outfile.close()
+
+ urlfile = open(fnameURL, 'w')
+ print >>urlfile, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+ if "\n" in url: url = url.replace("\n", " ")
+ print >>urlfile, url
+ urlfile.close()
+
+ os.rename(fnameTmp, fname)
+
+def getURLs(entry):
+ r = {}
+ for ftype in FILE_TYPES:
+ ftype2 = ftype.replace(".", "_")
+ url = entry.get("www_%s_url"%ftype2)
+ if url:
+ r[ftype] = url.strip().replace("\n", " ")
+ return r
+
+def getCachedURL(key, ftype):
+ fname = getCacheFname(key, ftype)
+ urlFname = fname+".url"
+ if not os.path.exists(fname) or not os.path.exists(urlFname):
+ return None
+ f = open(urlFname, 'r')
+ lines = f.readlines()
+ f.close()
+ if len(lines) != 2:
+ print >>sys.stderr, "ERROR: unexpected number of lines in", urlFname
+ return lines[1].strip()
+
+def downloadAll(bibtex, missingOnly=0):
+ """returns list of tuples of key, ftype, url, error"""
+ errors = []
+ for e in bibtex.entries:
+ urls = getURLs(e)
+ key = e.key
+ for ftype, url in urls.items():
+ fname = getCacheFname(key, ftype)
+ if missingOnly:
+ cachedURL = getCachedURL(key, ftype)
+ if cachedURL == url:
+ print >>sys.stderr,"Skipping",url
+ continue
+ elif cachedURL is not None:
+ print >>sys.stderr,"URL for %s.%s has changed"%(key,ftype)
+ else:
+ print >>sys.stderr,"No record for %s.%s"%(key,ftype)
+ try:
+ downloadFile(key, ftype, url)
+ print "Downloaded",url
+ except UIError, e:
+ print >>sys,stderr, str(e)
+ errors.append((key,ftype,url,str(e)))
+ except (IOError, socket.error), e:
+ msg = "Error downloading %s: %s"%(url,str(e))
+ print >>sys.stderr, msg
+ errors.append((key,ftype,url,msg))
+ return errors
+
+bib = BibTeX.parseFile(config.MASTER_BIB)
+downloadAll(bib,missingOnly=1)