package bibtex import ( "bytes" "log" "strings" "unicode" ) const sepChars = "-~" var lcommands map[string]bool = map[string]bool{ "aa": true, "oe": true, "l": true, "ae": true, "o": true, "ss": true, "i": true, "j": true, } var ucommands map[string]bool = map[string]bool{ "AA": true, "L": true, "OE": true, "AE": true, "O": true, } type Token struct { Sep rune // the separator preceding the token Text string } type NamePart []Token // a proto name before the parts have been separated in f,v,l,f type name []NamePart // abbreviate a string. return the first letter or special character func _abbrv(s string) string { reader := newReader(s) var buf bytes.Buffer for c := reader.readRune(); c != eof; c = reader.readRune() { switch { case c == '{': c = reader.readRune() if c == '\\' { buf.WriteString("{\\") buf.WriteString(reader.readBraces()) return buf.String() } else { reader.UnreadRune() } case unicode.IsLetter(c): buf.WriteRune(c) return buf.String() } } return buf.String() } func printSep(buf *bytes.Buffer, sep rune, last bool) { switch { case sep == '-', sep == '~': buf.WriteRune(sep) case last || length(buf.String()) <= 2: buf.WriteRune('~') default: buf.WriteRune(' ') } } // Format returns a string representation of NamePart p. // If abbrv the token will be abbreviated. If def is true, the default // inter-token will be used: this is either space or ~ depending on the length // of the token. If def is false sep is used as the inter-token separator. func (p NamePart) Format(abbrv bool, sep string, def bool) string { var buf bytes.Buffer for i, t := range p { if i > 0 { if abbrv { buf.WriteRune('.') } if def { printSep(&buf, t.Sep, i == len(p)-1) } else { buf.WriteString(sep) } } if abbrv { buf.WriteString(_abbrv(t.Text)) } else { buf.WriteString(t.Text) } } return buf.String() } func addNonEmptyToken(b *bytes.Buffer, t *Token, part *NamePart, c rune) { if b.Len() > 0 { t.Text = b.String() b.Reset() *part = append(*part, *t) *t = Token{Sep: c} } } func addNonEmptyPart(part *NamePart, name *name, t *Token, c rune) { if len(*part) > 0 || len(*name) == 0 { *name = append(*name, *part) } *part = nil *t = Token{Sep: c} } // tokenize a string and return a list of proto names, the parts of the proto // name are the result of splitting on commas, but further splitting is done // later func splitNames(s string) (names []name) { if len(s) == 0 { return nil } var name name // current name var part NamePart // current name Part Token := &Token{Sep: rune(0)} // current token bracelevel := 0 space := false // was the last non-special character a WS? var buf bytes.Buffer for _, c := range s { switch { case c == '{': buf.WriteRune(c) bracelevel += 1 case c == '}': buf.WriteRune(c) bracelevel -= 1 case unicode.IsSpace(c) && bracelevel == 0: if space && strings.ToLower(buf.String()) == "and" { buf.Reset() addNonEmptyPart(&part, &name, Token, c) names = append(names, name) name = nil } addNonEmptyToken(&buf, Token, &part, c) space = true case bracelevel == 0 && strings.ContainsRune(sepChars, c): space = false addNonEmptyToken(&buf, Token, &part, c) case bracelevel == 0 && c == ',': space = false addNonEmptyToken(&buf, Token, &part, c) // a name cannot have more than 3 proto parts, hence a comma // counts as a separation between parts only the first two times, // i.e. when len(name) == 0 or len(name) == 1 if len(name) <= 1 { addNonEmptyPart(&part, &name, Token, c) } default: buf.WriteRune(c) } } addNonEmptyToken(&buf, Token, &part, ',') addNonEmptyPart(&part, &name, Token, ',') names = append(names, name) return } type Name struct { First NamePart Von NamePart Last NamePart Jr NamePart } func (n Name) getPart(c rune) (p NamePart) { switch c { case 'f': p = n.First case 'v': p = n.Von case 'l': p = n.Last case 'j': p = n.Jr } return } // here the format string f is assumed to only contain one part func (n Name) formatPart(f string) string { f = f[1 : len(f)-1] r := newReader(f) var buf bytes.Buffer abbrv := true found := false def := true sep := "" for c := r.readRune(); c != eof; c = r.readRune() { switch { case strings.ContainsRune("fvljFVLJ", c) && !found: found = true typ := unicode.ToLower(c) if d := r.readRune(); unicode.ToLower(d) == typ { abbrv = false } else { r.UnreadRune() } if d := r.readRune(); d == '{' { def = false sep = "{" + r.readBraces() sep = sep[1 : len(sep)-1] } else { r.UnreadRune() } part := n.getPart(typ) if len(part) == 0 { return "" } s := part.Format(abbrv, sep, def) buf.WriteString(s) case unicode.IsLetter(c): log.Printf("Non valid char") default: buf.WriteRune(c) } } return dtie(buf.String()) } // handles discretionary ties func dtie(s string) string { if len(s) >= 2 && s[len(s)-1] == '~' && s[len(s)-2] != '~' { s = s[:len(s)-1] if length(s) <= 2 { s += "~" } else { s += " " } } return s } // Format formats the name n according to the format string s. func (n Name) Format(s string) string { r := newReader(s) var buf bytes.Buffer for c := r.readRune(); c != eof; c = r.readRune() { switch { case c == '{': fmt := "{" + r.readBraces() buf.WriteString(n.formatPart(fmt)) case c == '}': log.Printf("Unbalanced braced in format string %s", s) default: buf.WriteRune(c) } } return buf.String() } // the von part starts at the first non last lower-case taken func findVon(part NamePart) int { for i, t := range part { if isLower(t.Text) && i < len(part)-1 { return i } } return -1 } // reading tokens from the penultimate token, the boundary between von and last // is after the first lower token func splitVonLast(part NamePart) (NamePart, NamePart) { var i int if len(part) < 2 { return part[:0], part[:] } for i = len(part) - 2; i >= 0; i-- { if isLower(part[i].Text) { i += 1 break } } if i == -1 { return part[:0], part[:] } else { return part[:i], part[i:] } } // only when there is no von part and no comma // reading tokens from the last token, tokens are added to the last name if // they are connected to the following token by a hyphen func splitFirstLast(part NamePart) (NamePart, NamePart) { var i int if len(part) == 0 { return part[:0], part[:] } for i = len(part) - 1; i >= 0; i-- { if part[i].Sep != '-' { break } } if i == -1 { return part[:0], part[:] } else { return part[:i], part[i:] } } // SplitNames parses the string s and returns a list of names. func SplitNames(s string) (res []Name) { names := splitNames(s) var first, von, last, vonlast, jr NamePart for _, name := range names { first, von, last, vonlast, jr = nil, nil, nil, nil, nil if len(name) == 1 { i := findVon(name[0]) if i >= 0 { first = name[0][:i] vonlast = name[0][i:] von, last = splitVonLast(vonlast) } else { first, last = splitFirstLast(name[0]) } } else if len(name) == 2 { vonlast = name[0] von, last = splitVonLast(vonlast) first = name[1] } else if len(name) == 3 { vonlast = name[0] von, last = splitVonLast(vonlast) first = name[2] jr = name[1] } res = append(res, Name{first, von, last, jr}) } return }