import rank
import sys
import re
import config
import os
from utils import htmlize, url_untranslate, unTeXescapeURL, smartJoin,\
    _split, ALLCHARS, PRINTINGCHARS
from author import parseAuthor

# Fields that we only care about for making web pages (BibTeX doesn't
# recognize them.)
WWW_FIELDS = ['www_section', 'www_important', 'www_remarks',
              'www_abstract_url', 'www_html_url', 'www_pdf_url', 'www_ps_url',
              'www_txt_url', 'www_ps_gz_url', 'www_amazon_url',
              'www_excerpt_url', 'www_publisher_url',
              'www_cache_section', 'www_tags']

PROCEEDINGS_RE = re.compile(
    r'((?:proceedings|workshop record) of(?: the)? )(.*)',
    re.I)

# List of fields that appear when we display the entries as BibTeX.
DISPLAYED_FIELDS = ['title', 'author', 'journal', 'booktitle',
                    'school', 'institution', 'organization', 'volume',
                    'number', 'year', 'month', 'address', 'location',
                    'chapter', 'edition', 'pages', 'editor', 'howpublished',
                    'key', 'publisher', 'type', 'note', 'series']


class BibTeXEntry:
    """A single BibTeX entry."""
    def __init__(self, type, key, fields):
        self.type = type  # What kind of entry is it?  (@book,@injournal,etc)
        self.key = key  # What key does it have?
        self.fields = fields  # Map from key to value.
        self.entryLine = 0  # Defined on this line number

    def get(self, k, v=None):
        return self.fields.get(k, v)

    def __contains__(self, k):
        return k in self.fields

    def __getitem__(self, k):
        return self.fields[k]

    def __delitem__(self, k):
        del self.fields[k]

    def __setitem__(self, k, v):
        self.fields[k] = v

    def __str__(self):
        return self.format(70, 1)

    def __iter__(self):
        return iter(self.fields.keys())

    def getURL(self):
        """Return the best URL to use for this paper, or None."""
        best = None
        for field in ['www_pdf_url', 'www_ps_gz_url', 'www_ps_url',
                      'www_html_url', 'www_txt_url', ]:
            u = self.get(field)
            if u:
                if not best:
                    best = u
                elif (best.startswith("http://citeseer.nj.nec.com/")
                      and not u.startswith("http://citeseer.nj.nec.com/")):
                    best = u
        return best

    def format(self, width=70, indent=8, v=0, invStrings={}):
        """Format this entry as BibTeX."""
        d = ["@%s{%s,\n" % (self.type, self.key)]
        if v:
            df = DISPLAYED_FIELDS[:]
            for k in self:
                if k not in df:
                    df.append(k)
        else:
            df = DISPLAYED_FIELDS
        for f in df:
            if f not in self:
                continue
            v = self[f]
            if v.startswith("<span class='bad'>"):
                d.append("%%%%% ERROR: Missing field\n")
                d.append("%% %s = {?????},\n" % f)
                continue
            np = v.translate(ALLCHARS, PRINTINGCHARS)
            if np:
                d.append("%%%%% "
                         + ("ERROR: Non-ASCII characters: '%r'\n" % np))
            d.append("  ")
            v = v.replace("&", "&amp;")
            if v in invStrings:
                s = "%s = %s,\n" % (f, invStrings[v])
            else:
                s = "%s = {%s},\n" % (f, v)
            d.append(_split(s, width, indent))
        d.append("}\n")
        return "".join(d)

    def resolve(self):
        """Handle post-processing for this entry"""
        a = self.get('author')
        if a:
            self.parsedAuthor = parseAuthor(a)
            # print a
            # print "   => ",repr(self.parsedAuthor)
        else:
            self.parsedAuthor = None

    def isImportant(self):
        """Return 1 iff this entry is marked as important"""
        imp = self.get("www_important")
        if imp and imp.strip().lower() not in ("no", "false", "0"):
            return 1
        return 0

    def check(self):
        """Print any errors for this entry, and return true if there were
           none."""
        errs = self._check()
        for e in errs:
            print e
        return not errs

    def _check(self):
        errs = []
        if self.type == 'inproceedings':
            fields = 'booktitle', 'year'
        elif self.type == 'incollection':
            fields = 'booktitle', 'year'
        elif self.type == 'proceedings':
            fields = 'booktitle', 'editor'
        elif self.type == 'article':
            fields = 'journal', 'year'
        elif self.type == 'techreport':
            fields = 'institution',
        elif self.type == 'misc':
            fields = 'howpublished',
        elif self.type in ('mastersthesis', 'phdthesis'):
            fields = ()
        else:
            fields = ()
            errs.append("ERROR: odd type %s" % self.type)
        if self.type != 'proceedings':
            fields += 'title', 'author', 'www_section', 'year'

        for field in fields:
            if self.get(field) is None or \
                    self.get(field).startswith("<span class='bad'>"):
                errs.append("ERROR: %s has no %s field" % (self.key, field))
                self[field] = "<span class='bad'>%s:??</span>" % field

        if self.type == 'inproceedings':
            if self.get("booktitle"):
                if not self['booktitle'].startswith("Proceedings of") and \
                   not self['booktitle'].startswith("{Proceedings of"):
                    errs.append("ERROR: %s's booktitle (%r) doesn't start with 'Proceedings of'" % (self.key, self['booktitle']))

        if "pages" in self and not re.search(r'\d+--\d+', self['pages']):
            errs.append("ERROR: Misformed pages in %s" % self.key)

        if self.type == 'proceedings':
            if self.get('title'):
                errs.append("ERROR: %s is a proceedings: it should have a booktitle, not a title." % self.key)

        for field, value in self.fields.items():
            if value.translate(ALLCHARS, PRINTINGCHARS):
                errs.append("ERROR: %s.%s has non-ASCII characters" % (
                    self.key, field))
            if field.startswith("www_") and field not in WWW_FIELDS:
                errs.append("ERROR: unknown www field %s" % field)
            if value.strip()[-1:] == '.' and \
                field not in ("notes", "www_remarks", "author"):
                errs.append("ERROR: %s.%s has an extraneous period" % (self.key,
                                                                       field))
        return errs

    def biblio_to_html(self):
        """Return the HTML for the citation portion of entry."""
        if self.type in ('inproceedings', 'incollection'):
            booktitle = self['booktitle']
            bookurl = self.get('bookurl')
            if bookurl:
                m = PROCEEDINGS_RE.match(booktitle)
                if m:
                    res = ["In the ", m.group(1),
                           '<a href="%s">' % bookurl, m.group(2), "</a>"]
                else:
                    res = ['In the <a href="%s">%s</a>' % (bookurl, booktitle)]
            else:
                res = ["In the ", booktitle]

            if self.get("edition"):
                res.append(",")
                res.append(self['edition'])
            if self.get("location"):
                res.append(", ")
                res.append(self['location'])
            elif self.get("address"):
                res.append(", ")
                res.append(self['address'])
            res.append(", %s %s" % (self.get('month', ""), self['year']))
            if not self.get('pages'):
                pass
            elif "-" in self['pages']:
                res.append(", pages&nbsp;%s" % self['pages'])
            else:
                res.append(", page&nbsp;%s" % self['pages'])
        elif self.type == 'article':
            res = ["In "]
            if self.get('journalurl'):
                res.append('<a href="%s">%s</a>' % (self['journalurl'],
                                                    self['journal']))
            else:
                res.append(self['journal'])
            if self.get('volume'):
                res.append(" <b>%s</b>" % self['volume'])
            if self.get('number'):
                res.append("(%s)" % self['number'])
            res.append(", %s %s" % (self.get('month', ""), self['year']))
            if not self.get('pages'):
                pass
            elif "-" in self['pages']:
                res.append(", pages&nbsp;%s" % self['pages'])
            else:
                res.append(", page&nbsp;%s" % self['pages'])
        elif self.type == 'techreport':
            res = ["%s %s %s" % (self['institution'],
                                 self.get('type', 'technical report'),
                                 self.get('number', ""))]
            if self.get('month') or self.get('year'):
                res.append(", %s %s" % (self.get('month', ''),
                                        self.get('year', '')))
        elif self.type == 'mastersthesis' or self.type == 'phdthesis':
            if self.get('type'):
                res = [self['type']]
            elif self.type == 'mastersthesis':
                res = ["Masters's thesis"]
            else:
                res = ["Ph.D. thesis"]
            if self.get('school'):
                res.append(", %s" % (self['school']))
            if self.get('month') or self.get('year'):
                res.append(", %s %s" % (self.get('month', ''),
                                        self.get('year', '')))
        elif self.type == 'book':
            res = [self['publisher']]
            if self.get('year'):
                res.append(" ")
                res.append(self.get('year'))
                # res.append(", %s"%(self.get('year')))
            if self.get('series'):
                res.append(",")
                res.append(self['series'])
        elif self.type == 'misc':
            res = [self['howpublished']]
            if self.get('month') or self.get('year'):
                res.append(", %s %s" % (self.get('month', ''),
                                        self.get('year', '')))
            if not self.get('pages'):
                pass
            elif "-" in self['pages']:
                res.append(", pages&nbsp;%s" % self['pages'])
            else:
                res.append(", page&nbsp;%s" % self['pages'])
        else:
            res = ["&lt;Odd type %s&gt;" % self.type]

        res[0:0] = ["<span class='biblio'>"]
        res.append(".</span>")

        bibtexurl = "./bibtex.html#%s" % url_untranslate(self.key)
        res.append((" <span class='availability'>"
                    "(<a href='%s'>BibTeX&nbsp;entry</a>)"
                    "</span>") % bibtexurl)
        return htmlize("".join(res))

    def to_html(self, cache_path="./cache", base_url="."):
        """Return the HTML for this entry."""
        imp = self.isImportant()
        draft = self.get('year') == 'forthcoming'
        if imp:
            res = ["<li><div class='impEntry'><p class='impEntry'>"]
        elif draft:
            res = ["<li><div class='draftEntry'><p class='draftEntry'>"]
        else:
            res = ["<li><p class='entry'>"]

        if imp or not draft:
            # Add a picture of the rank
            # Only if year is known or paper important!
            r = rank.get_rank_html(self['title'], self.get('year'),
                                   update=False, base_url=base_url)
            if r is not None:
                res.append(r)

        res.append("<span class='title'><a name='%s'>%s</a></span>"%(
            url_untranslate(self.key),htmlize(self['title'])))

        for cached in 0,1:
            availability = []
            if not cached:
                for which in [ "amazon", "excerpt", "publisher" ]:
                    key = "www_%s_url"%which
                    if self.get(key):
                        url=self[key]
                        url = unTeXescapeURL(url)
                        availability.append('<a href="%s">%s</a>' %(url,which))

            cache_section = self.get('www_cache_section', ".")
            if cache_section not in config.CACHE_SECTIONS:
                if cache_section != ".":
                    print >>sys.stderr, "Unrecognized cache section %s"%(
                        cache_section)
                    cache_section="."

            for key, name, ext in (('www_abstract_url', 'abstract','abstract'),
                                   ('www_html_url', 'HTML', 'html'),
                                   ('www_pdf_url', 'PDF', 'pdf'),
                                   ('www_ps_url', 'PS', 'ps'),
                                   ('www_txt_url', 'TXT', 'txt'),
                                   ('www_ps_gz_url', 'gzipped&nbsp;PS','ps.gz')
                                   ):
                if cached:
                    #XXXX the URL needs to be relative to the absolute
                    #XXXX cache path.
                    url = smartJoin(cache_path,cache_section,
                                    "%s.%s"%(self.key,ext))
                    fname = smartJoin(config.OUTPUT_DIR, config.CACHE_DIR,
                                      cache_section,
                                      "%s.%s"%(self.key,ext))
                    if not os.path.exists(fname): continue
                else:
                    url = self.get(key)
                    if not url: continue
                url = unTeXescapeURL(url)
                url = url.replace('&', '&amp;')
                availability.append('<a href="%s">%s</a>' %(url,name))

            if availability:
                res.append([" ", "&nbsp;"][cached])
                res.append("<span class='availability'>(")
                if cached: res.append("Cached:&nbsp;")
                res.append(",&nbsp;".join(availability))
                res.append(")</span>")

        res.append("<br /><span class='author'>by ")

        #res.append("\n<!-- %r -->\n" % self.parsedAuthor)
        htmlAuthors = [ a.htmlizeWithLink() for a in self.parsedAuthor ]

        if len(htmlAuthors) == 1:
            res.append(htmlAuthors[0])
        elif len(htmlAuthors) == 2:
            res.append(" and ".join(htmlAuthors))
        else:
            res.append(", ".join(htmlAuthors[:-1]))
            res.append(", and ")
            res.append(htmlAuthors[-1])

        if res[-1][-1] != '.':
            res.append(".")
        res.append("</span><br />\n")
        res.append(self.biblio_to_html())
        res.append("<a href='#%s'>&middot;</a>"%url_untranslate(self.key))
        res.append("</p>")

        if self.get('www_remarks'):
            res.append("<p class='remarks'>%s</p>"%htmlize(
                self['www_remarks']))

        if imp or draft:
            res.append("</div>")
        res.append("</li>\n\n")

        return "".join(res)