diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2016-02-15 20:45:25 -0500 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2016-02-15 20:45:25 -0500 |
| commit | 374e1677e65fe2217444c328ad13677430fcd903 (patch) | |
| tree | c5178a06562f6ff3a2357dac543cc261bb478648 | |
| download | bibtex-374e1677e65fe2217444c328ad13677430fcd903.tar.gz | |
Initial commit, rough implmentation
| -rw-r--r-- | parser.go | 441 |
1 files changed, 441 insertions, 0 deletions
diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..d9106c0 --- /dev/null +++ b/parser.go @@ -0,0 +1,441 @@ +package main + +import ( + "bufio" + "bytes" + "fmt" + "io" + "log" + "os" + "strings" + "unicode" +) + +type Entry struct { + typ string + key string + fields map[string]string + line int +} + +func NewEntry(typ string) *Entry { + return &Entry{typ: typ, fields: make(map[string]string)} +} + +const eof = rune(0) + +type ParseError struct { + line int + col int + msg string + file string +} + +func (e *ParseError) Error() string { + return fmt.Sprintf("%s:%d:%d error: %s", e.file, e.line, e.col, e.msg) +} + +type Parser struct { + *bufio.Reader + fname string + strings map[string]string + entries map[string]*Entry + lineno int + colno int + preamble string + lastread rune +} + +func NewParser(f *os.File) *Parser { + strings := map[string]string{ + "jan": "January", "feb": "February", "mar": "March", "apr": "April", + "may": "May", "jun": "June", "jul": "July", "aug": "August", + "sep": "September", "oct": "October", "nov": "November", + "dec": "December", + } + return &Parser{Reader: bufio.NewReader(f), strings: strings, + entries: make(map[string]*Entry), lineno: 1, colno: 0, + fname: f.Name()} +} + +func (p *Parser) NewError(msg string) error { + return &ParseError{msg: msg, line: p.lineno, col: p.colno, file: p.fname} +} + +func (p *Parser) Warning(msg string) { + log.Printf("%s:%d:%d warning: %s", p.fname, p.lineno, p.colno, msg) +} + +func (p *Parser) read() rune { + ch, _, err := p.ReadRune() + if err == io.EOF { + return eof + } + if ch == '\n' { + p.lineno += 1 + p.colno = 0 + } + p.colno += 1 + p.lastread = ch + return ch +} + +func (p *Parser) peek() rune { + ch, _, err := p.ReadRune() + p.UnreadRune() + if err == io.EOF { + return eof + } + return ch +} + +func (p *Parser) unread() { + p.colno -= 1 + if p.lastread == '\n' { + p.lineno -= 1 + } + p.UnreadRune() +} + +func (p *Parser) readToken(s string) string { + var buf bytes.Buffer + var ch rune + + for { + ch = p.read() + if strings.ContainsRune(s, ch) || unicode.IsSpace(ch) { + p.unread() + break + } else { + buf.WriteRune(ch) + } + } + return buf.String() +} + +const IllegalIdChars = "{}()=\",#%" + +func (p *Parser) readIdentifier() string { + var buf bytes.Buffer + var ch rune + if ch = p.read(); unicode.IsDigit(ch) { + p.unread() + return "" + } else if strings.ContainsRune(IllegalIdChars, ch) || + unicode.IsSpace(ch) { + p.unread() + return "" + } else { + buf.WriteRune(ch) + } + + for { + ch = p.read() + if strings.ContainsRune(IllegalIdChars, ch) || unicode.IsSpace(ch) { + p.unread() + break + } else { + buf.WriteRune(ch) + } + } + return buf.String() +} + +func (p *Parser) eatSpace() { + var ch rune + for { + ch = p.read() + if !unicode.IsSpace(ch) { + p.unread() + break + } + } +} + +func (p *Parser) readBraceLiteral() (string, error) { + var buf bytes.Buffer + ch := p.read() + if ch != '{' { + p.unread() + return "", p.NewError("Expected '{'") + } + blevel := 1 + for { + ch = p.read() + if ch == '{' { + blevel += 1 + } else if ch == '}' { + blevel -= 1 + if blevel == 0 { + break + } + } + buf.WriteRune(ch) + } + return buf.String(), nil +} + +func (p *Parser) readStringLiteral() (string, error) { + var buf bytes.Buffer + ch := p.read() + if ch != '"' { + p.unread() + return "", p.NewError("Expected '\"'") + } + blevel := 0 + for blevel >= 0 { + ch = p.read() + if ch == '{' { + blevel += 1 + } else if ch == '}' { + blevel -= 1 + } else if ch == '"' && blevel == 0 { + break + } + buf.WriteRune(ch) + } + if blevel != 0 { + return "", p.NewError("Unbalanced '{'") + } + return buf.String(), nil +} + +func (p *Parser) readNumber() (string, error) { + var buf bytes.Buffer + var ch rune + for { + ch = p.read() + if !unicode.IsDigit(ch) { + p.unread() + break + } else { + buf.WriteRune(ch) + } + } + return buf.String(), nil +} + +func (p *Parser) readLiteral() (string, error) { + if ch := p.peek(); ch == '{' { + return p.readBraceLiteral() + } else if ch == '"' { + return p.readStringLiteral() + } else if unicode.IsDigit(ch) { + return p.readNumber() + } else { + id := p.readIdentifier() + if id == "" { + return "", p.NewError("Expected an identifier") + } + if str, in := p.strings[strings.ToLower(id)]; in { + return str, nil + } else { + p.Warning(fmt.Sprintf("Unknown string %q", id)) + return "", nil + } + } +} + +func (p *Parser) readValue() (string, error) { + var buf bytes.Buffer + var ch rune + value, err := p.readLiteral() + if err != nil { + return "", err + } + buf.WriteString(value) + for { + p.eatSpace() + if ch = p.read(); ch == '#' { + p.eatSpace() + value, err := p.readLiteral() + if err != nil { + return "", err + } + buf.WriteString(value) + } else { + p.unread() + break + } + } + return buf.String(), nil +} + +func (p *Parser) readIdValue() (string, string, error) { + p.eatSpace() + id := p.readIdentifier() + if id == "" { + return "", "", p.NewError("Expected an identifier") + } + p.eatSpace() + if ch := p.read(); ch != '=' { + p.unread() + return "", "", p.NewError("Expected '='") + } + p.eatSpace() + value, err := p.readValue() + if err != nil { + return "", "", err + } + return id, value, nil +} + +func (p *Parser) readOpen() (rune, error) { + p.eatSpace() + if ch := p.read(); ch == '(' { + return ')', nil + } else if ch == '{' { + return '}', nil + } else { + p.unread() + return ch, p.NewError("Expected '(' or '{'") + } +} + +func (p *Parser) readPreamble() error { + close, err := p.readOpen() + if err != nil { + return err + } + p.eatSpace() + val, err := p.readValue() + if err != nil { + return err + } + p.preamble += val + p.eatSpace() + if ch := p.read(); ch != close { + p.unread() + return p.NewError(fmt.Sprintf("Expected %q", close)) + } + return nil +} + +func (p *Parser) readString() error { + close, err := p.readOpen() + if err != nil { + return err + } + p.eatSpace() + id, value, err := p.readIdValue() + if err != nil { + return err + } + if _, in := p.strings[strings.ToLower(id)]; in { + p.Warning(fmt.Sprintf("String %q already defined, ignoring", id)) + } else { + p.strings[strings.ToLower(id)] = value + } + p.eatSpace() + if ch := p.read(); ch != close { + p.unread() + return p.NewError(fmt.Sprintf("Expected %q", close)) + } + return nil +} + +func (p *Parser) readEntry(entry *Entry) error { + close, err := p.readOpen() + if err != nil { + return err + } + p.eatSpace() + key := p.readToken("," + string(close)) + entry.key = key + for { + p.eatSpace() + if ch := p.read(); ch == close { + return nil + } else if ch == ',' { + p.eatSpace() + if ch := p.read(); ch == close { + return nil + } else { + p.unread() + id, value, err := p.readIdValue() + if err != nil { + return err + } + if _, in := entry.fields[strings.ToLower(id)]; in { + p.Warning(fmt.Sprintf("Field %q already defined, ignoring", + id)) + } else { + entry.fields[strings.ToLower(id)] = value + } + } + } else { + p.unread() + return p.NewError(fmt.Sprintf("Expected ',' or %q", close)) + } + } + return nil +} + +func (p *Parser) readDeclaration() error { + typ := p.readIdentifier() + if typ == "" { + return p.NewError("Expected entry type") + } + typ = strings.ToLower(typ) + var err error + switch typ { + case "string": + err = p.readString() + case "preamble": + err = p.readPreamble() + case "comment": + default: + entry := NewEntry(typ) + entry.line = p.lineno + err = p.readEntry(entry) + if _, in := p.entries[strings.ToLower(entry.key)]; in { + p.Warning(fmt.Sprintf("Entry %q already defined, ignoring", + entry.key)) + } else { + p.entries[strings.ToLower(entry.key)] = entry + } + } + return err +} + +func (p *Parser) Parse(strict bool) error { + var ch rune + for { + ch = p.read() + if ch == '@' { + p.eatSpace() + err := p.readDeclaration() + if err != nil { + switch err.(type) { + case *ParseError: + if strict { + return err + } else { + log.Println(err.Error()) + } + default: + return err + } + } + } else if ch == eof { + break + } + } + return nil +} + +func main() { + log.SetFlags(0) + log.SetOutput(os.Stderr) + p := NewParser(os.Stdin) + err := p.Parse(false) + if err != nil { + log.Fatal(err) + } + for key, entry := range p.entries { + fmt.Println(key, entry.typ) + for name, value := range entry.fields { + fmt.Println("\t", name, value) + } + } +} |
