import sys
import config
import re
from utils import htmlize, txtize, ALLCHARS, PRINTINGCHARS


LC_CHARS = "abcdefghijklmnopqrstuvwxyz"
SV_DELCHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
               "abcdefghijklmnopqrstuvwxyz"
               "@")
RE_ESCAPED = re.compile(r'\\.')

def split_von(f,v,l,x):
    in_von = 0
    while x:
        tt = t = x[0]
        del x[0]
        if tt[:2] == '{\\':
            tt = tt.translate(ALLCHARS, SV_DELCHARS)
            tt = RE_ESCAPED.sub("", tt)
            tt = tt.translate(ALLCHARS, "{}")
        if tt.translate(ALLCHARS, LC_CHARS) == "":
            v.append(t)
            in_von = 1
        elif in_von and f is not None:
            l.append(t)
            l.extend(x)
            return
        else:
            f.append(t)
    if not in_von:
        l.append(f[-1])
        del f[-1]

def buildAuthorTable(entries):
    """Given a list of BibTeXEntry, return a map from parsed author name to
       parsed canonical name.
    """
    authorsByLast = {}
    for e in entries:
        for a in e.parsedAuthor:
            authorsByLast.setdefault(tuple(a.last), []).append(a)
    # map from author to collapsed author.
    result = {}
    for k,v in config.COLLAPSE_AUTHORS.items():
        a = parseAuthor(k)[0]
        c = parseAuthor(v)[0]
        result[c] = c
        result[a] = c

    for e in entries:
        for author in e.parsedAuthor:
            if result.has_key(author):
                continue

            c = author
            for a in authorsByLast[tuple(author.last)]:
                if a is author:
                    continue
                c = c.collapsesTo(a)
            result[author] = c

    if 0:
        for a,c in result.items():
            if a != c:
                print "Collapsing authors: %s => %s" % (a,c)
    if 0:
        print parseAuthor("Franz Kaashoek")[0].collapsesTo(
            parseAuthor("M. Franz Kaashoek")[0])
        print parseAuthor("Paul F. Syverson")[0].collapsesTo(
            parseAuthor("Paul Syverson")[0])
        print parseAuthor("Paul Syverson")[0].collapsesTo(
            parseAuthor("Paul F. Syverson")[0])

    return result

class ParsedAuthor:
    """The parsed name of an author.

       Eddie deserves credit for this incredibly hairy business.
    """
    def __init__(self, first, von, last, jr):
        self.first = first
        self.von = von
        self.last = last
        self.jr = jr
        self.collapsable = 1

        self.html = htmlize(str(self))
        self.txt = txtize(str(self))

        s = self.html
        for pat in config.NO_COLLAPSE_AUTHORS_RE_LIST:
            if pat.search(s):
                self.collapsable = 0
                break

    def __eq__(self, o):
        return ((self.first == o.first) and
                (self.last  == o.last) and
                (self.von   == o.von) and
                (self.jr    == o.jr))

    def __hash__(self):
        return hash(repr(self))

    def collapsesTo(self, o):
        """Return true iff 'o' could be a more canonical version of this author
        """
        if not self.collapsable or not o.collapsable:
            return self

        if self.last != o.last or self.von != o.von or self.jr != o.jr:
            return self
        if not self.first:
            return o

        if len(self.first) == len(o.first):
            n = []
            for a,b in zip(self.first, o.first):
                if a == b:
                    n.append(a)
                elif len(a) == 2 and a[1] == '.' and a[0] == b[0]:
                    n.append(b)
                elif len(b) == 2 and b[1] == '.' and a[0] == b[0]:
                    n.append(a)
                else:
                    return self
            if n == self.first:
                return self
            elif n == o.first:
                return o
            else:
                return self
        else:
            realname = max([len(n) for n in self.first+o.first])>2
            if not realname:
                return self

            if len(self.first) < len(o.first):
                short = self.first; long = o.first
            else:
                short = o.first; long = self.first

            initials_s = "".join([n[0] for n in short])
            initials_l = "".join([n[0] for n in long])
            idx = initials_l.find(initials_s)
            if idx < 0:
                return self
            n = long[:idx]
            for i in range(idx, idx+len(short)):
                a = long[i]; b = short[i-idx]
                if a == b:
                    n.append(a)
                elif len(a) == 2 and a[1] == '.' and a[0] == b[0]:
                    n.append(b)
                elif len(b) == 2 and b[1] == '.' and a[0] == b[0]:
                    n.append(a)
                else:
                    return self
            n += long[idx+len(short):]

            if n == self.first:
                return self
            elif n == o.first:
                return o
            else:
                return self

    def __repr__(self):
        return "ParsedAuthor(%r,%r,%r,%r)"%(self.first,self.von,
                                            self.last,self.jr)
    def __str__(self):
        a = " ".join(self.first+self.von+self.last)
        if self.jr:
            return "%s, %s" % (a,self.jr)
        return a

    def getHomepage(self):
        s = self.html
        for pat, url in config.AUTHOR_RE_LIST:
            if pat.search(s):
                return url
        return None

    def getSortingName(self):
        """Return a representation of this author's name in von-last-first-jr
           order, unless overridden by ALPH """
        s = self.html
        for pat,v in config.ALPHABETIZE_AUTHOR_AS_RE_LIST:
            if pat.search(s):
                return v

        return txtize(" ".join(self.von+self.last+self.first+self.jr))

    def getSectionName(self):
        """Return a HTML representation of this author's name in
           last, first von, jr order"""
        secname = " ".join(self.last)
        more = self.first+self.von
        if more:
            secname += ", "+" ".join(more)
        if self.jr:
            secname += ", "+" ".join(self.jr)
        secname = htmlize(secname)
        return secname

    def htmlizeWithLink(self):
        a = self.html
        u = self.getHomepage()
        if u:
            return "<a href='%s'>%s</a>"%(u,a)
        else:
            return a


def parseAuthor(s):
    try:
        return _parseAuthor(s)
    except:
        print >>sys.stderr, "Internal error while parsing author %r"%s
        raise

def _parseAuthor(s):
    """Take an author string and return a list of ParsedAuthor."""
    items = []

    s = s.strip()
    while s:
        s = s.strip()
        bracelevel = 0
        for i in xrange(len(s)):
            if s[i] == '{':
                bracelevel += 1
            elif s[i] == '}':
                bracelevel -= 1
            elif bracelevel <= 0 and s[i] in " \t\n,":
                break
        if i+1 == len(s):
            items.append(s)
        else:
            items.append(s[0:i])
        if (s[i] == ','):
            items.append(',')
        s = s[i+1:]

    authors = [[]]
    for item in items:
        if item == 'and':
            authors.append([])
        else:
            authors[-1].append(item)

    parsedAuthors = []
    # Split into first, von, last, jr
    for author in authors:
        commas = 0
        fvl = []
        vl = []
        f = []
        v = []
        l = []
        j = []
        cur = fvl
        for item in author:
            if item == ',':
                if commas == 0:
                    vl = fvl
                    fvl = []
                    cur = f
                else:
                    j.extend(f)
                    cur = f = []
                commas += 1
            else:
                cur.append(item)

        if commas == 0:
            split_von(f,v,l,fvl)
        else:
            f_tmp = []
            split_von(f_tmp,v,l,vl)

        parsedAuthors.append(ParsedAuthor(f,v,l,j))

    return parsedAuthors