package bibtex import ( "bufio" "bytes" "fmt" "io" "log" "os" "strings" "unicode" ) type Entry struct { typ string key string fields map[string]string line int } func NewEntry(typ string) *Entry { return &Entry{typ: typ, fields: make(map[string]string)} } const eof = rune(0) type ParseError struct { line int col int msg string file string } func (e *ParseError) Error() string { return fmt.Sprintf("%s:%d:%d error: %s", e.file, e.line, e.col, e.msg) } type Parser struct { *bufio.Reader fname string strings map[string]string entries map[string]*Entry lineno int colno int preamble string lastread rune } func NewParser(f *os.File) *Parser { strings := map[string]string{ "jan": "January", "feb": "February", "mar": "March", "apr": "April", "may": "May", "jun": "June", "jul": "July", "aug": "August", "sep": "September", "oct": "October", "nov": "November", "dec": "December", } return &Parser{Reader: bufio.NewReader(f), strings: strings, entries: make(map[string]*Entry), lineno: 1, colno: 0, fname: f.Name()} } func (p *Parser) NewError(msg string) error { return &ParseError{msg: msg, line: p.lineno, col: p.colno, file: p.fname} } func (p *Parser) Warning(msg string) { log.Printf("%s:%d:%d warning: %s", p.fname, p.lineno, p.colno, msg) } func (p *Parser) read() rune { ch, _, err := p.ReadRune() if err == io.EOF { return eof } if ch == '\n' { p.lineno += 1 p.colno = 0 } p.colno += 1 p.lastread = ch return ch } func (p *Parser) peek() rune { ch, _, err := p.ReadRune() p.UnreadRune() if err == io.EOF { return eof } return ch } func (p *Parser) unread() { p.colno -= 1 if p.lastread == '\n' { p.lineno -= 1 } p.UnreadRune() } func (p *Parser) readToken(s string) string { var buf bytes.Buffer var ch rune for { ch = p.read() if strings.ContainsRune(s, ch) || unicode.IsSpace(ch) { p.unread() break } else { buf.WriteRune(ch) } } return buf.String() } const IllegalIdChars = "{}()=\",#%" func (p *Parser) readIdentifier() string { var buf bytes.Buffer var ch rune if ch = p.read(); unicode.IsDigit(ch) { p.unread() return "" } else if strings.ContainsRune(IllegalIdChars, ch) || unicode.IsSpace(ch) { p.unread() return "" } else { buf.WriteRune(ch) } for { ch = p.read() if strings.ContainsRune(IllegalIdChars, ch) || unicode.IsSpace(ch) { p.unread() break } else { buf.WriteRune(ch) } } return buf.String() } func (p *Parser) eatSpace() { var ch rune for { ch = p.read() if !unicode.IsSpace(ch) { p.unread() break } } } func (p *Parser) readBraceLiteral() (string, error) { var buf bytes.Buffer ch := p.read() if ch != '{' { p.unread() return "", p.NewError("Expected '{'") } blevel := 1 for { ch = p.read() if ch == '{' { blevel += 1 } else if ch == '}' { blevel -= 1 if blevel == 0 { break } } buf.WriteRune(ch) } return buf.String(), nil } func (p *Parser) readStringLiteral() (string, error) { var buf bytes.Buffer ch := p.read() if ch != '"' { p.unread() return "", p.NewError("Expected '\"'") } blevel := 0 for blevel >= 0 { ch = p.read() if ch == '{' { blevel += 1 } else if ch == '}' { blevel -= 1 } else if ch == '"' && blevel == 0 { break } buf.WriteRune(ch) } if blevel != 0 { return "", p.NewError("Unbalanced '{'") } return buf.String(), nil } func (p *Parser) readNumber() (string, error) { var buf bytes.Buffer var ch rune for { ch = p.read() if !unicode.IsDigit(ch) { p.unread() break } else { buf.WriteRune(ch) } } return buf.String(), nil } func (p *Parser) readLiteral() (string, error) { if ch := p.peek(); ch == '{' { return p.readBraceLiteral() } else if ch == '"' { return p.readStringLiteral() } else if unicode.IsDigit(ch) { return p.readNumber() } else { id := p.readIdentifier() if id == "" { return "", p.NewError("Expected an identifier") } if str, in := p.strings[strings.ToLower(id)]; in { return str, nil } else { p.Warning(fmt.Sprintf("Unknown string %q", id)) return "", nil } } } func (p *Parser) readValue() (string, error) { var buf bytes.Buffer var ch rune value, err := p.readLiteral() if err != nil { return "", err } buf.WriteString(value) for { p.eatSpace() if ch = p.read(); ch == '#' { p.eatSpace() value, err := p.readLiteral() if err != nil { return "", err } buf.WriteString(value) } else { p.unread() break } } return buf.String(), nil } func (p *Parser) readIdValue() (string, string, error) { p.eatSpace() id := p.readIdentifier() if id == "" { return "", "", p.NewError("Expected an identifier") } p.eatSpace() if ch := p.read(); ch != '=' { p.unread() return "", "", p.NewError("Expected '='") } p.eatSpace() value, err := p.readValue() if err != nil { return "", "", err } return id, value, nil } func (p *Parser) readOpen() (rune, error) { p.eatSpace() if ch := p.read(); ch == '(' { return ')', nil } else if ch == '{' { return '}', nil } else { p.unread() return ch, p.NewError("Expected '(' or '{'") } } func (p *Parser) readPreamble() error { close, err := p.readOpen() if err != nil { return err } p.eatSpace() val, err := p.readValue() if err != nil { return err } p.preamble += val p.eatSpace() if ch := p.read(); ch != close { p.unread() return p.NewError(fmt.Sprintf("Expected %q", close)) } return nil } func (p *Parser) readString() error { close, err := p.readOpen() if err != nil { return err } p.eatSpace() id, value, err := p.readIdValue() if err != nil { return err } if _, in := p.strings[strings.ToLower(id)]; in { p.Warning(fmt.Sprintf("String %q already defined, ignoring", id)) } else { p.strings[strings.ToLower(id)] = value } p.eatSpace() if ch := p.read(); ch != close { p.unread() return p.NewError(fmt.Sprintf("Expected %q", close)) } return nil } func (p *Parser) readEntry(entry *Entry) error { close, err := p.readOpen() if err != nil { return err } p.eatSpace() key := p.readToken("," + string(close)) entry.key = key if _, in := p.entries[strings.ToLower(key)]; in { p.Warning(fmt.Sprintf("Entry %q already defined, ignoring", key)) } else { p.entries[strings.ToLower(key)] = entry } for { p.eatSpace() if ch := p.read(); ch == close { return nil } else if ch == ',' { p.eatSpace() if ch := p.read(); ch == close { return nil } else { p.unread() id, value, err := p.readIdValue() if err != nil { return err } if _, in := entry.fields[strings.ToLower(id)]; in { p.Warning(fmt.Sprintf("Field %q already defined, ignoring", id)) } else { entry.fields[strings.ToLower(id)] = value } } } else { p.unread() return p.NewError(fmt.Sprintf("Expected ',' or %q", close)) } } return nil } func (p *Parser) readDeclaration() error { typ := p.readIdentifier() if typ == "" { return p.NewError("Expected entry type") } typ = strings.ToLower(typ) var err error switch typ { case "string": err = p.readString() case "preamble": err = p.readPreamble() case "comment": default: entry := NewEntry(typ) entry.line = p.lineno err = p.readEntry(entry) } return err } func (p *Parser) Parse(strict bool) error { var ch rune for { ch = p.read() if ch == '@' { p.eatSpace() err := p.readDeclaration() if err != nil { switch err.(type) { case *ParseError: if strict { return err } else { log.Println(err.Error()) } default: return err } } } else if ch == eof { break } } return nil } func main() { log.SetFlags(0) log.SetOutput(os.Stderr) p := NewParser(os.Stdin) err := p.Parse(false) if err != nil { log.Fatal(err) } for key, entry := range p.entries { fmt.Println(key, entry.typ) for name, value := range entry.fields { fmt.Println("\t", name, value) } } }