From 1c5aca9a62b3a637d7fc9053c6ae40dcc5e2c09b Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Wed, 30 Jul 2008 18:39:02 +0000 Subject: Do not actually build a URL opener for ranks until we know we have a cacche miss. Under some circumstances, building the opener hits the network. svn:r291 --- rank.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'rank.py') diff --git a/rank.py b/rank.py index 76ca181..3f7ed71 100644 --- a/rank.py +++ b/rank.py @@ -56,9 +56,6 @@ def getCite(title, cache=True, update=True, save=True): title = re.sub("[^'a-zA-Z0-9\. \-\/:]", "", title) title = re.sub("'\/", " ", title) - # Make a custom user agent (so that we are not filtered by Google)! - opener = build_opener() - opener.addheaders = [('User-agent', 'Anon.Bib.0.1')] # We rely on google scholar to return the article with this exact title gurl = "http://scholar.google.com/scholar?as_epq=%s&as_occt=title" @@ -70,6 +67,11 @@ def getCite(title, cache=True, update=True, save=True): page = file(join(cache_folder(), md5h(url)),'r').read() elif update: print "Downloading rank for %r."%title + + # Make a custom user agent (so that we are not filtered by Google)! + opener = build_opener() + opener.addheaders = [('User-agent', 'Anon.Bib.0.1')] + page = opener.open(url).read() if save: file(join(cache_folder(), md5h(url)),'w').write(page) else: -- cgit v1.2.3-70-g09d2