package bibtex import ( "bufio" "bytes" "fmt" "io" "log" "os" "strconv" "strings" "unicode" ) const eof = rune(0) type ParseError struct { line int col int msg string file string } func (e ParseError) Error() string { return fmt.Sprintf("%s:%d:%d error: %s", e.file, e.line, e.col, e.msg) } type parser struct { *bufio.Reader *Database fname string lineno int colno int lastread rune } func NewParser(r io.Reader) *parser { strings := map[string]string{ "jan": "January", "feb": "February", "mar": "March", "apr": "April", "may": "May", "jun": "June", "jul": "July", "aug": "August", "sep": "September", "oct": "October", "nov": "November", "dec": "December", } var name string switch v := r.(type) { case *os.File: name = v.Name() default: name = "" } res := make(map[string]Value) for key, s := range strings { res[key] = []Literal{StringLiteral(s)} } db := &Database{Strings: res, Entries: make(map[string]Entry)} return &parser{Reader: bufio.NewReader(r), lineno: 1, colno: 0, fname: name, Database: db} } func (p *parser) NewError(msg string) error { return ParseError{msg: msg, line: p.lineno, col: p.colno, file: p.fname} } func (p *parser) Warning(msg string) { log.Printf("%s:%d:%d warning: %s", p.fname, p.lineno, p.colno, msg) } func (p *parser) readUnsafe() rune { ch, _, _ := p.ReadRune() if ch == '\n' { p.lineno += 1 p.colno = 0 } p.colno += 1 p.lastread = ch return ch } func (p *parser) read() rune { ch, _, err := p.ReadRune() if err == io.EOF { panic(err) } if ch == '\n' { p.lineno += 1 p.colno = 0 } p.colno += 1 p.lastread = ch return ch } func (p *parser) peek() rune { ch, _, err := p.ReadRune() p.UnreadRune() if err == io.EOF { return eof } return ch } func (p *parser) unread() { p.colno -= 1 if p.lastread == '\n' { p.lineno -= 1 } p.UnreadRune() } func (p *parser) readToken(s string) string { var buf bytes.Buffer var ch rune for { ch = p.read() if strings.ContainsRune(s, ch) || unicode.IsSpace(ch) { p.unread() break } else { buf.WriteRune(ch) } } return buf.String() } const IllegalIdChars = "{}()=\",#%" func (p *parser) readIdentifier() string { var buf bytes.Buffer var ch rune if ch = p.read(); unicode.IsDigit(ch) { p.unread() return "" } else if strings.ContainsRune(IllegalIdChars, ch) || unicode.IsSpace(ch) { p.unread() return "" } else { buf.WriteRune(ch) } for { ch = p.read() if strings.ContainsRune(IllegalIdChars, ch) || unicode.IsSpace(ch) { p.unread() break } else { buf.WriteRune(ch) } } return buf.String() } func (p *parser) eatSpace() { var ch rune for { ch = p.read() if !unicode.IsSpace(ch) { p.unread() break } } } func (p *parser) readBraceLiteral() BraceLiteral { var buf bytes.Buffer ch := p.read() if ch != '{' { p.unread() panic(p.NewError("Expected '{'")) } blevel := 1 for { ch = p.read() if ch == '{' { blevel += 1 } else if ch == '}' { blevel -= 1 if blevel == 0 { break } } buf.WriteRune(ch) } return BraceLiteral(buf.String()) } func (p *parser) readStringLiteral() StringLiteral { var buf bytes.Buffer ch := p.read() if ch != '"' { p.unread() panic(p.NewError("Expected '\"'")) } blevel := 0 for blevel >= 0 { ch = p.read() if ch == '{' { blevel += 1 } else if ch == '}' { blevel -= 1 } else if ch == '"' && blevel == 0 { break } buf.WriteRune(ch) } if blevel != 0 { panic(p.NewError("Unbalanced '{'")) } return StringLiteral(buf.String()) } func (p *parser) readNumber() NumberLiteral { var buf bytes.Buffer var ch rune for { ch = p.read() if !unicode.IsDigit(ch) { p.unread() break } else { buf.WriteRune(ch) } } n, err := strconv.Atoi(buf.String()) if err != nil { p.Warning(fmt.Sprintf("Couldn't parse number %q", buf.String())) } return NumberLiteral(n) } func (p *parser) readLiteral() Literal { if ch := p.peek(); ch == '{' { return p.readBraceLiteral() } else if ch == '"' { return p.readStringLiteral() } else if unicode.IsDigit(ch) { return p.readNumber() } else { id := strings.ToLower(p.readIdentifier()) if id == "" { panic(p.NewError("Expected an identifier")) } if v, in := p.Strings[id]; in { return VariableLiteral{id, &v} } else { p.Warning(fmt.Sprintf("Unknown string %q", id)) return VariableLiteral{id, &v} } } } func (p *parser) readValue() (res Value) { var ch rune res = append(res, p.readLiteral()) for { p.eatSpace() if ch = p.read(); ch == '#' { p.eatSpace() res = append(res, p.readLiteral()) } else { p.unread() break } } return } func (p *parser) readIdValue() (string, Value) { p.eatSpace() id := p.readIdentifier() if id == "" { panic(p.NewError("Expected an identifier")) } p.eatSpace() if ch := p.read(); ch != '=' { p.unread() panic(p.NewError("Expected '='")) } p.eatSpace() value := p.readValue() return id, value } func (p *parser) readOpen() rune { p.eatSpace() if ch := p.read(); ch == '(' { return ')' } else if ch == '{' { return '}' } else { p.unread() panic(p.NewError("Expected '(' or '{'")) } } func (p *parser) readPreamble() { close := p.readOpen() p.eatSpace() p.Preamble = append(p.Preamble, p.readValue()) p.eatSpace() if ch := p.read(); ch != close { p.unread() panic(p.NewError(fmt.Sprintf("Expected %q", close))) } } func (p *parser) readString() { close := p.readOpen() p.eatSpace() id, value := p.readIdValue() id = strings.ToLower(id) if _, in := p.Strings[id]; in { p.Warning(fmt.Sprintf("String %q already defined, ignoring", id)) } else { p.Strings[id] = value p.SNames = append(p.SNames, id) } p.eatSpace() if ch := p.read(); ch != close { p.unread() panic(p.NewError(fmt.Sprintf("Expected %q", close))) } } func (p *parser) readEntry(t string) { close := p.readOpen() p.eatSpace() var entry Entry key := p.readToken("," + string(close)) entry.Key = key entry.Type = t entry.Fields = make(map[string]Value) key = strings.ToLower(key) if _, in := p.Entries[key]; in { p.Warning(fmt.Sprintf("Entry %q already defined, ignoring", key)) return } for { p.eatSpace() if ch := p.read(); ch == close { break } else if ch == ',' { p.eatSpace() if ch := p.read(); ch == close { break } else { p.unread() id, value := p.readIdValue() id = strings.ToLower(id) if _, in := entry.Fields[id]; in { p.Warning(fmt.Sprintf("Field %q already defined, ignoring", id)) } else { entry.Fields[id] = value entry.FNames = append(entry.FNames, id) } } } else { p.unread() panic(p.NewError(fmt.Sprintf("Expected ',' or %q", close))) } } p.Entries[key] = entry p.EKeys = append(p.EKeys, key) } func (p *parser) readDeclaration() (err error) { defer errorHandler(&err, p) t := p.readIdentifier() if t == "" { err = p.NewError("Expected entry type") } t = strings.ToLower(t) switch t { case "string": p.readString() case "preamble": p.readPreamble() case "comment": default: p.readEntry(t) } return } func errorHandler(errp *error, p *parser) { if e := recover(); e != nil { switch e.(type) { case ParseError: *errp = e.(ParseError) case error: if e == io.EOF { *errp = e.(error) } else { panic(e) } default: panic(e) } } } func Parse(r io.Reader, strict bool) (db *Database, err error) { p := NewParser(r) var ch rune for { ch = p.readUnsafe() switch ch { case '@': p.eatSpace() err = p.readDeclaration() if err != nil { if strict { return } else { log.Print(err.Error()) } } case eof: db = p.Database return } } }