import rank import sys import re import config import os from utils import htmlize, url_untranslate, unTeXescapeURL, smartJoin,\ _split, ALLCHARS, PRINTINGCHARS from author import parseAuthor # Fields that we only care about for making web pages (BibTeX doesn't # recognize them.) WWW_FIELDS = ['www_section', 'www_important', 'www_remarks', 'www_abstract_url', 'www_html_url', 'www_pdf_url', 'www_ps_url', 'www_txt_url', 'www_ps_gz_url', 'www_amazon_url', 'www_excerpt_url', 'www_publisher_url', 'www_cache_section', 'www_tags'] PROCEEDINGS_RE = re.compile( r'((?:proceedings|workshop record) of(?: the)? )(.*)', re.I) # List of fields that appear when we display the entries as BibTeX. DISPLAYED_FIELDS = ['title', 'author', 'journal', 'booktitle', 'school', 'institution', 'organization', 'volume', 'number', 'year', 'month', 'address', 'location', 'chapter', 'edition', 'pages', 'editor', 'howpublished', 'key', 'publisher', 'type', 'note', 'series'] class BibTeXEntry: """A single BibTeX entry.""" def __init__(self, type, key, fields): self.type = type # What kind of entry is it? (@book,@injournal,etc) self.key = key # What key does it have? self.fields = fields # Map from key to value. self.entryLine = 0 # Defined on this line number def get(self, k, v=None): return self.fields.get(k, v) def __contains__(self, k): return k in self.fields def __getitem__(self, k): return self.fields[k] def __delitem__(self, k): del self.fields[k] def __setitem__(self, k, v): self.fields[k] = v def __str__(self): return self.format(70, 1) def __iter__(self): return iter(self.fields.keys()) def getURL(self): """Return the best URL to use for this paper, or None.""" best = None for field in ['www_pdf_url', 'www_ps_gz_url', 'www_ps_url', 'www_html_url', 'www_txt_url', ]: u = self.get(field) if u: if not best: best = u elif (best.startswith("http://citeseer.nj.nec.com/") and not u.startswith("http://citeseer.nj.nec.com/")): best = u return best def format(self, width=70, indent=8, v=0, invStrings={}): """Format this entry as BibTeX.""" d = ["@%s{%s,\n" % (self.type, self.key)] if v: df = DISPLAYED_FIELDS[:] for k in self: if k not in df: df.append(k) else: df = DISPLAYED_FIELDS for f in df: if f not in self: continue v = self[f] if v.startswith(""): d.append("%%%%% ERROR: Missing field\n") d.append("%% %s = {?????},\n" % f) continue np = v.translate(ALLCHARS, PRINTINGCHARS) if np: d.append("%%%%% " + ("ERROR: Non-ASCII characters: '%r'\n" % np)) d.append(" ") v = v.replace("&", "&") if v in invStrings: s = "%s = %s,\n" % (f, invStrings[v]) else: s = "%s = {%s},\n" % (f, v) d.append(_split(s, width, indent)) d.append("}\n") return "".join(d) def resolve(self): """Handle post-processing for this entry""" a = self.get('author') if a: self.parsedAuthor = parseAuthor(a) # print a # print " => ",repr(self.parsedAuthor) else: self.parsedAuthor = None def isImportant(self): """Return 1 iff this entry is marked as important""" imp = self.get("www_important") if imp and imp.strip().lower() not in ("no", "false", "0"): return 1 return 0 def check(self): """Print any errors for this entry, and return true if there were none.""" errs = self._check() for e in errs: print e return not errs def _check(self): errs = [] if self.type == 'inproceedings': fields = 'booktitle', 'year' elif self.type == 'incollection': fields = 'booktitle', 'year' elif self.type == 'proceedings': fields = 'booktitle', 'editor' elif self.type == 'article': fields = 'journal', 'year' elif self.type == 'techreport': fields = 'institution', elif self.type == 'misc': fields = 'howpublished', elif self.type in ('mastersthesis', 'phdthesis'): fields = () else: fields = () errs.append("ERROR: odd type %s" % self.type) if self.type != 'proceedings': fields += 'title', 'author', 'www_section', 'year' for field in fields: if self.get(field) is None or \ self.get(field).startswith(""): errs.append("ERROR: %s has no %s field" % (self.key, field)) self[field] = "%s:??" % field if self.type == 'inproceedings': if self.get("booktitle"): if not self['booktitle'].startswith("Proceedings of") and \ not self['booktitle'].startswith("{Proceedings of"): errs.append("ERROR: %s's booktitle (%r) doesn't start with 'Proceedings of'" % (self.key, self['booktitle'])) if "pages" in self and not re.search(r'\d+--\d+', self['pages']): errs.append("ERROR: Misformed pages in %s" % self.key) if self.type == 'proceedings': if self.get('title'): errs.append("ERROR: %s is a proceedings: it should have a booktitle, not a title." % self.key) for field, value in self.fields.items(): if value.translate(ALLCHARS, PRINTINGCHARS): errs.append("ERROR: %s.%s has non-ASCII characters" % ( self.key, field)) if field.startswith("www_") and field not in WWW_FIELDS: errs.append("ERROR: unknown www field %s" % field) if value.strip()[-1:] == '.' and \ field not in ("notes", "www_remarks", "author"): errs.append("ERROR: %s.%s has an extraneous period" % (self.key, field)) return errs def biblio_to_html(self): """Return the HTML for the citation portion of entry.""" if self.type in ('inproceedings', 'incollection'): booktitle = self['booktitle'] bookurl = self.get('bookurl') if bookurl: m = PROCEEDINGS_RE.match(booktitle) if m: res = ["In the ", m.group(1), '' % bookurl, m.group(2), ""] else: res = ['In the %s' % (bookurl, booktitle)] else: res = ["In the ", booktitle] if self.get("edition"): res.append(",") res.append(self['edition']) if self.get("location"): res.append(", ") res.append(self['location']) elif self.get("address"): res.append(", ") res.append(self['address']) res.append(", %s %s" % (self.get('month', ""), self['year'])) if not self.get('pages'): pass elif "-" in self['pages']: res.append(", pages %s" % self['pages']) else: res.append(", page %s" % self['pages']) elif self.type == 'article': res = ["In "] if self.get('journalurl'): res.append('%s' % (self['journalurl'], self['journal'])) else: res.append(self['journal']) if self.get('volume'): res.append(" %s" % self['volume']) if self.get('number'): res.append("(%s)" % self['number']) res.append(", %s %s" % (self.get('month', ""), self['year'])) if not self.get('pages'): pass elif "-" in self['pages']: res.append(", pages %s" % self['pages']) else: res.append(", page %s" % self['pages']) elif self.type == 'techreport': res = ["%s %s %s" % (self['institution'], self.get('type', 'technical report'), self.get('number', ""))] if self.get('month') or self.get('year'): res.append(", %s %s" % (self.get('month', ''), self.get('year', ''))) elif self.type == 'mastersthesis' or self.type == 'phdthesis': if self.get('type'): res = [self['type']] elif self.type == 'mastersthesis': res = ["Masters's thesis"] else: res = ["Ph.D. thesis"] if self.get('school'): res.append(", %s" % (self['school'])) if self.get('month') or self.get('year'): res.append(", %s %s" % (self.get('month', ''), self.get('year', ''))) elif self.type == 'book': res = [self['publisher']] if self.get('year'): res.append(" ") res.append(self.get('year')) # res.append(", %s"%(self.get('year'))) if self.get('series'): res.append(",") res.append(self['series']) elif self.type == 'misc': res = [self['howpublished']] if self.get('month') or self.get('year'): res.append(", %s %s" % (self.get('month', ''), self.get('year', ''))) if not self.get('pages'): pass elif "-" in self['pages']: res.append(", pages %s" % self['pages']) else: res.append(", page %s" % self['pages']) else: res = ["<Odd type %s>" % self.type] res[0:0] = [""] res.append(".") bibtexurl = "./bibtex.html#%s" % url_untranslate(self.key) res.append((" " "(BibTeX entry)" "") % bibtexurl) return htmlize("".join(res)) def to_html(self, cache_path="./cache", base_url="."): """Return the HTML for this entry.""" imp = self.isImportant() draft = self.get('year') == 'forthcoming' if imp: res = ["
  • "] elif draft: res = ["

  • "] else: res = ["

  • "] if imp or not draft: # Add a picture of the rank # Only if year is known or paper important! r = rank.get_rank_html(self['title'], self.get('year'), update=False, base_url=base_url) if r is not None: res.append(r) res.append("%s"%( url_untranslate(self.key),htmlize(self['title']))) for cached in 0,1: availability = [] if not cached: for which in [ "amazon", "excerpt", "publisher" ]: key = "www_%s_url"%which if self.get(key): url=self[key] url = unTeXescapeURL(url) availability.append('%s' %(url,which)) cache_section = self.get('www_cache_section', ".") if cache_section not in config.CACHE_SECTIONS: if cache_section != ".": print >>sys.stderr, "Unrecognized cache section %s"%( cache_section) cache_section="." for key, name, ext in (('www_abstract_url', 'abstract','abstract'), ('www_html_url', 'HTML', 'html'), ('www_pdf_url', 'PDF', 'pdf'), ('www_ps_url', 'PS', 'ps'), ('www_txt_url', 'TXT', 'txt'), ('www_ps_gz_url', 'gzipped PS','ps.gz') ): if cached: #XXXX the URL needs to be relative to the absolute #XXXX cache path. url = smartJoin(cache_path,cache_section, "%s.%s"%(self.key,ext)) fname = smartJoin(config.OUTPUT_DIR, config.CACHE_DIR, cache_section, "%s.%s"%(self.key,ext)) if not os.path.exists(fname): continue else: url = self.get(key) if not url: continue url = unTeXescapeURL(url) url = url.replace('&', '&') availability.append('%s' %(url,name)) if availability: res.append([" ", " "][cached]) res.append("(") if cached: res.append("Cached: ") res.append(", ".join(availability)) res.append(")") res.append("
    by ") #res.append("\n\n" % self.parsedAuthor) htmlAuthors = [ a.htmlizeWithLink() for a in self.parsedAuthor ] if len(htmlAuthors) == 1: res.append(htmlAuthors[0]) elif len(htmlAuthors) == 2: res.append(" and ".join(htmlAuthors)) else: res.append(", ".join(htmlAuthors[:-1])) res.append(", and ") res.append(htmlAuthors[-1]) if res[-1][-1] != '.': res.append(".") res.append("
    \n") res.append(self.biblio_to_html()) res.append("·"%url_untranslate(self.key)) res.append("

    ") if self.get('www_remarks'): res.append("

    %s

    "%htmlize( self['www_remarks'])) if imp or draft: res.append("") res.append("
  • \n\n") return "".join(res)