package bibtex import ( "bufio" "bytes" "fmt" "io" "log" "os" "strconv" "strings" "unicode" ) const eof = rune(0) type scanner struct { *bufio.Reader lineno int colno int ch rune fname string buffer bytes.Buffer } func NewScanner(r io.Reader) *scanner { var name string switch v := r.(type) { case *os.File: name = v.Name() default: name = "" } return &scanner{Reader: bufio.NewReader(r), lineno: 1, colno: 0, fname: name} } func (s *scanner) Next() { s.colno += 1 ch, _, err := s.ReadRune() s.ch = ch if err != nil { panic(err) } if ch == '\n' { s.lineno += 1 s.colno = 0 } } func (s *scanner) Text() string { return s.buffer.String() } func (s *scanner) readToken(endChars string) { s.buffer.Reset() for !unicode.IsSpace(s.ch) && !strings.ContainsRune(endChars, s.ch) { s.buffer.WriteRune(s.ch) s.Next() } } type ParseError struct { line int col int msg string file string } func (e ParseError) Error() string { return fmt.Sprintf("%s:%d:%d error: %s", e.file, e.line, e.col, e.msg) } type parser struct { *Database *scanner } func NewParser(r io.Reader) *parser { strings := map[string]string{ "jan": "January", "feb": "February", "mar": "March", "apr": "April", "may": "May", "jun": "June", "jul": "July", "aug": "August", "sep": "September", "oct": "October", "nov": "November", "dec": "December", } res := make(map[string]Value, len(strings)) for key, s := range strings { res[key] = StringLiteral(s) } var preamble LiteralList db := &Database{Strings: res, Entries: make(map[string]*Entry), CrossRefs: make(map[string]int), Preamble: preamble} return &parser{scanner: NewScanner(r), Database: db} } func (p *parser) NewError(msg string) error { return ParseError{msg: msg, line: p.lineno, col: p.colno, file: p.fname} } func (p *parser) Warning(msg string) { log.Printf("%s:%d:%d warning: %s", p.fname, p.lineno, p.colno, msg) } const IllegalIdChars = "'{}()=\",#%" func (s *scanner) readIdentifier() { if unicode.IsDigit(s.ch) { s.buffer.Reset() return } else { s.readToken(IllegalIdChars) } } func (s *scanner) eatSpace() { for ; unicode.IsSpace(s.ch); s.Next() { } } func (s *scanner) readBraceLiteral() { s.buffer.Reset() s.Next() blevel := 0 for blevel > 0 || (blevel == 0 && s.ch != '}') { if s.ch == '{' { blevel += 1 } else if s.ch == '}' { blevel -= 1 } s.buffer.WriteRune(s.ch) s.Next() } s.Next() } func (s *scanner) readStringLiteral() { s.buffer.Reset() blevel := 0 s.Next() for blevel > 0 || (blevel == 0 && s.ch != '"') { if s.ch == '{' { blevel += 1 } else if s.ch == '}' { blevel -= 1 } s.buffer.WriteRune(s.ch) s.Next() } s.Next() } func (s *scanner) readNumber() { s.buffer.Reset() for unicode.IsDigit(s.ch) { s.buffer.WriteRune(s.ch) s.Next() } } func (p *parser) readLiteral() Value { switch { case p.ch == '{': p.readBraceLiteral() return BraceLiteral(p.Text()) case p.ch == '"': p.readStringLiteral() return StringLiteral(p.Text()) case unicode.IsDigit(p.ch): p.readNumber() n, err := strconv.Atoi(p.Text()) if err != nil { p.Warning(fmt.Sprintf("Couldn't parse number %q", p.Text())) } return NumberLiteral(n) default: p.readIdentifier() id := p.Text() if id == "" { panic(p.NewError("Expected an identifier")) } if v, in := p.Strings[strings.ToLower(id)]; in { return VarLiteral{id, &v} } else { p.Warning(fmt.Sprintf("Unknown string %q", id)) l := Value(StringLiteral(id)) return VarLiteral{id, &l} } } } func (p *parser) readValue() Value { var res LiteralList res = append(res, p.readLiteral()) for p.eatSpace(); p.ch == '#'; p.eatSpace() { p.Next() p.eatSpace() res = append(res, p.readLiteral()) } return res } func (p *parser) readIdValue() (string, Value) { p.readIdentifier() id := p.Text() if id == "" { panic(p.NewError("Expected an identifier")) } p.eatSpace() if p.ch != '=' { panic(p.NewError("Expected '='")) } p.Next() p.eatSpace() value := p.readValue() return id, value } func (p *parser) readOpen() rune { if p.ch == '(' { p.Next() return ')' } else if p.ch == '{' { p.Next() return '}' } else { panic(p.NewError("Expected '(' or '{'")) } } func (p *parser) readPreamble() { close := p.readOpen() p.eatSpace() prb := p.Preamble.(LiteralList) p.Preamble = append(prb, p.readValue()) p.eatSpace() if p.ch != close { panic(p.NewError(fmt.Sprintf("Expected %q", close))) } else { p.Next() } } func (p *parser) readString() { close := p.readOpen() p.eatSpace() id, value := p.readIdValue() id = strings.ToLower(id) if _, in := p.Strings[id]; in { p.Warning(fmt.Sprintf("String %q already defined, overwriting", id)) } else { p.SNames = append(p.SNames, id) } p.Strings[id] = value p.eatSpace() if p.ch != close { panic(p.NewError(fmt.Sprintf("Expected %q", close))) } else { p.Next() } } func (p *parser) checkCrossRef(id string, value Value) { if id != "crossref" { return } key := strings.ToLower(value.String()) if _, in := p.Entries[key]; in { p.Warning(fmt.Sprintf("Crossreference %q defined before being used", key)) } p.CrossRefs[key] += 1 } func (p *parser) readEntry(t string) { close := p.readOpen() p.eatSpace() if close == ')' { p.readToken(",") } else { p.readToken(",}") } key := strings.ToLower(p.Text()) entry := &Entry{} store := true if _, in := p.Entries[key]; in { p.Warning(fmt.Sprintf("Entry %q already defined, ignoring", key)) store = false } else { entry.Key = key entry.Type = t entry.Fields = make(map[string]Value) p.Entries[key] = entry p.EKeys = append(p.EKeys, key) } for p.eatSpace(); p.ch != close; p.eatSpace() { if p.ch != ',' { panic(p.NewError(fmt.Sprintf("Expected ',' or %q", close))) } p.Next() p.eatSpace() if p.ch == close { break } id, value := p.readIdValue() id = strings.ToLower(id) if _, in := entry.Fields[id]; in { p.Warning(fmt.Sprintf("Field %q already defined, ignoring", id)) } else if store { p.checkCrossRef(id, value) entry.Fields[id] = value entry.FNames = append(entry.FNames, id) } } p.Next() } func (p *parser) readDeclaration() { p.readIdentifier() p.eatSpace() t := p.Text() if t == "" { panic(p.NewError("Expected entry type")) } t = strings.ToLower(t) switch t { case "string": p.readString() case "preamble": p.readPreamble() case "comment": default: p.readEntry(t) } } func errorHandler(err *error, p *parser, strict bool) { if e := recover(); e != nil { switch er := e.(type) { case ParseError: *err = er if strict { return } else { log.Print(er.Error()) p.parse(strict) } case error: if e != io.EOF { panic(e) } default: panic(e) } } } func (p *parser) parse(strict bool) (err error) { defer errorHandler(&err, p, strict) for { if p.ch == '@' { p.Next() p.eatSpace() p.readDeclaration() } p.Next() } } func Parse(r io.Reader, strict bool) (db *Database, err error) { p := NewParser(r) err = p.parse(strict) db = p.Database return }