diff options
| -rw-r--r-- | database.go | 18 | ||||
| -rw-r--r-- | parser.go | 435 | ||||
| -rw-r--r-- | utils.go | 4 |
3 files changed, 191 insertions, 266 deletions
diff --git a/database.go b/database.go index 945f9eb..ddb9340 100644 --- a/database.go +++ b/database.go @@ -115,12 +115,12 @@ func (e Entry) Marshal() string { } type Database struct { - SNames []string `json:"-"` - Strings map[string]Value `json:"strings,omitempty"` - Entries map[string]Entry `json:"entries"` - Preamble Value `json:"preamble,omitempty"` - EKeys []string `json:"-"` - CrossRefs map[string]int `json:"crossrefs,omitempty"` + SNames []string `json:"-"` + Strings map[string]Value `json:"strings,omitempty"` + Entries map[string]*Entry `json:"entries"` + Preamble Value `json:"preamble,omitempty"` + EKeys []string `json:"-"` + CrossRefs map[string]int `json:"crossrefs,omitempty"` } func (d *Database) UnTex() { @@ -158,14 +158,16 @@ func (d *Database) SplitNames() { func (d *Database) Resolve() { var key string - var ref Entry for _, entry := range d.Entries { for field, value := range entry.Fields { if field != "crossref" { continue } key = strings.ToLower(value.String()) - ref = d.Entries[key] + ref, in := d.Entries[key] + if !in { + continue + } for f, v := range ref.Fields { if _, in := entry.Fields[f]; !in { entry.Fields[f] = v @@ -14,6 +14,51 @@ import ( const eof = rune(0) +type scanner struct { + *bufio.Reader + lineno int + colno int + ch rune + fname string + buffer bytes.Buffer +} + +func NewScanner(r io.Reader) *scanner { + var name string + switch v := r.(type) { + case *os.File: + name = v.Name() + default: + name = "" + } + return &scanner{Reader: bufio.NewReader(r), lineno: 1, colno: 0, + fname: name} +} + +func (s *scanner) Next() { + s.colno += 1 + ch, _, err := s.ReadRune() + s.ch = ch + if err != nil { + panic(err) + } + if ch == '\n' { + s.lineno += 1 + s.colno = 0 + } +} + +func (s *scanner) Text() string { + return s.buffer.String() +} + +func (s *scanner) readToken(endChars string) { + s.buffer.Reset() + for !unicode.IsSpace(s.ch) && !strings.ContainsRune(endChars, s.ch) { + s.buffer.WriteRune(s.ch) + s.Next() + } +} type ParseError struct { line int col int @@ -26,12 +71,8 @@ func (e ParseError) Error() string { } type parser struct { - *bufio.Reader *Database - fname string - lineno int - colno int - lastread rune + *scanner } func NewParser(r io.Reader) *parser { @@ -41,21 +82,15 @@ func NewParser(r io.Reader) *parser { "sep": "September", "oct": "October", "nov": "November", "dec": "December", } - var name string - switch v := r.(type) { - case *os.File: - name = v.Name() - default: - name = "" - } res := make(map[string]Value, len(strings)) for key, s := range strings { res[key] = StringLiteral(s) } - db := &Database{Strings: res, Entries: make(map[string]Entry), - CrossRefs: make(map[string]int)} - return &parser{Reader: bufio.NewReader(r), lineno: 1, colno: 0, - fname: name, Database: db} + var preamble LiteralList + db := &Database{Strings: res, Entries: make(map[string]*Entry), + CrossRefs: make(map[string]int), + Preamble: preamble} + return &parser{scanner: NewScanner(r), Database: db} } func (p *parser) NewError(msg string) error { @@ -66,237 +101,129 @@ func (p *parser) Warning(msg string) { log.Printf("%s:%d:%d warning: %s", p.fname, p.lineno, p.colno, msg) } -func (p *parser) readUnsafe() rune { - ch, _, _ := p.ReadRune() - if ch == '\n' { - p.lineno += 1 - p.colno = 0 - } - p.colno += 1 - p.lastread = ch - return ch -} - -func (p *parser) read() rune { - ch, _, err := p.ReadRune() - if err == io.EOF { - panic(err) - } - if ch == '\n' { - p.lineno += 1 - p.colno = 0 - } - p.colno += 1 - p.lastread = ch - return ch -} - -func (p *parser) peek() rune { - ch, _, err := p.ReadRune() - p.UnreadRune() - if err == io.EOF { - return eof - } - return ch -} -func (p *parser) unread() { - p.colno -= 1 - if p.lastread == '\n' { - p.lineno -= 1 - } - p.UnreadRune() -} +const IllegalIdChars = "'{}()=\",#%" -func (p *parser) readToken(s string) string { - var buf bytes.Buffer - var ch rune - - for { - ch = p.read() - if strings.ContainsRune(s, ch) || unicode.IsSpace(ch) { - p.unread() - break - } else { - buf.WriteRune(ch) - } - } - return buf.String() -} - -const IllegalIdChars = "{}()=\",#%" - -func (p *parser) readIdentifier() string { - var buf bytes.Buffer - var ch rune - if ch = p.read(); unicode.IsDigit(ch) { - p.unread() - return "" - } else if strings.ContainsRune(IllegalIdChars, ch) || - unicode.IsSpace(ch) { - p.unread() - return "" +func (s *scanner) readIdentifier() { + if unicode.IsDigit(s.ch) { + s.buffer.Reset() + return } else { - buf.WriteRune(ch) + s.readToken(IllegalIdChars) } - - for { - ch = p.read() - if strings.ContainsRune(IllegalIdChars, ch) || unicode.IsSpace(ch) { - p.unread() - break - } else { - buf.WriteRune(ch) - } - } - return buf.String() } -func (p *parser) eatSpace() { - var ch rune - for { - ch = p.read() - if !unicode.IsSpace(ch) { - p.unread() - break - } +func (s *scanner) eatSpace() { + for ; unicode.IsSpace(s.ch); s.Next() { } } -func (p *parser) readBraceLiteral() BraceLiteral { - var buf bytes.Buffer - ch := p.read() - if ch != '{' { - p.unread() - panic(p.NewError("Expected '{'")) - } - blevel := 1 - for { - ch = p.read() - if ch == '{' { +func (s *scanner) readBraceLiteral() { + s.buffer.Reset() + s.Next() + blevel := 0 + for blevel > 0 || (blevel == 0 && s.ch != '}') { + if s.ch == '{' { blevel += 1 - } else if ch == '}' { + } else if s.ch == '}' { blevel -= 1 - if blevel == 0 { - break - } } - buf.WriteRune(ch) + s.buffer.WriteRune(s.ch) + s.Next() } - return BraceLiteral(buf.String()) + s.Next() } -func (p *parser) readStringLiteral() StringLiteral { - var buf bytes.Buffer - ch := p.read() - if ch != '"' { - p.unread() - panic(p.NewError("Expected '\"'")) - } +func (s *scanner) readStringLiteral() { + s.buffer.Reset() blevel := 0 - for blevel >= 0 { - ch = p.read() - if ch == '{' { + s.Next() + for blevel > 0 || (blevel == 0 && s.ch != '"') { + if s.ch == '{' { blevel += 1 - } else if ch == '}' { + } else if s.ch == '}' { blevel -= 1 - } else if ch == '"' && blevel == 0 { - break } - buf.WriteRune(ch) + s.buffer.WriteRune(s.ch) + s.Next() } - if blevel != 0 { - panic(p.NewError("Unbalanced '{'")) - } - return StringLiteral(buf.String()) + s.Next() } -func (p *parser) readNumber() NumberLiteral { - var buf bytes.Buffer - var ch rune - for { - ch = p.read() - if !unicode.IsDigit(ch) { - p.unread() - break - } else { - buf.WriteRune(ch) - } - } - n, err := strconv.Atoi(buf.String()) - if err != nil { - p.Warning(fmt.Sprintf("Couldn't parse number %q", buf.String())) +func (s *scanner) readNumber() { + s.buffer.Reset() + for unicode.IsDigit(s.ch) { + s.buffer.WriteRune(s.ch) + s.Next() } - return NumberLiteral(n) } func (p *parser) readLiteral() Value { - if ch := p.peek(); ch == '{' { - return p.readBraceLiteral() - } else if ch == '"' { - return p.readStringLiteral() - } else if unicode.IsDigit(ch) { - return p.readNumber() - } else { - id := strings.ToLower(p.readIdentifier()) + switch { + case p.ch == '{': + p.readBraceLiteral() + return BraceLiteral(p.Text()) + case p.ch == '"': + p.readStringLiteral() + return StringLiteral(p.Text()) + case unicode.IsDigit(p.ch): + p.readNumber() + n, err := strconv.Atoi(p.Text()) + if err != nil { + p.Warning(fmt.Sprintf("Couldn't parse number %q", p.Text())) + } + return NumberLiteral(n) + default: + p.readIdentifier() + id := p.Text() if id == "" { panic(p.NewError("Expected an identifier")) } - if v, in := p.Strings[id]; in { + if v, in := p.Strings[strings.ToLower(id)]; in { return VarLiteral{id, &v} } else { p.Warning(fmt.Sprintf("Unknown string %q", id)) - l := Value(StringLiteral("")) + l := Value(StringLiteral(id)) return VarLiteral{id, &l} } } } func (p *parser) readValue() Value { - var ch rune var res LiteralList res = append(res, p.readLiteral()) - for { + for p.eatSpace(); p.ch == '#'; p.eatSpace() { + p.Next() p.eatSpace() - if ch = p.read(); ch == '#' { - p.eatSpace() - res = append(res, p.readLiteral()) - } else { - p.unread() - break - } - } - if len(res) == 1 { - return res[0] - } else { - return res + res = append(res, p.readLiteral()) } + return res } func (p *parser) readIdValue() (string, Value) { - p.eatSpace() - id := p.readIdentifier() + p.readIdentifier() + id := p.Text() if id == "" { panic(p.NewError("Expected an identifier")) } p.eatSpace() - if ch := p.read(); ch != '=' { - p.unread() + if p.ch != '=' { panic(p.NewError("Expected '='")) } + p.Next() p.eatSpace() value := p.readValue() return id, value } func (p *parser) readOpen() rune { - p.eatSpace() - if ch := p.read(); ch == '(' { + if p.ch == '(' { + p.Next() return ')' - } else if ch == '{' { + } else if p.ch == '{' { + p.Next() return '}' } else { - p.unread() panic(p.NewError("Expected '(' or '{'")) } } @@ -307,9 +234,10 @@ func (p *parser) readPreamble() { prb := p.Preamble.(LiteralList) p.Preamble = append(prb, p.readValue()) p.eatSpace() - if ch := p.read(); ch != close { - p.unread() + if p.ch != close { panic(p.NewError(fmt.Sprintf("Expected %q", close))) + } else { + p.Next() } } @@ -319,15 +247,16 @@ func (p *parser) readString() { id, value := p.readIdValue() id = strings.ToLower(id) if _, in := p.Strings[id]; in { - p.Warning(fmt.Sprintf("String %q already defined, ignoring", id)) + p.Warning(fmt.Sprintf("String %q already defined, overwriting", id)) } else { - p.Strings[id] = value p.SNames = append(p.SNames, id) } + p.Strings[id] = value p.eatSpace() - if ch := p.read(); ch != close { - p.unread() + if p.ch != close { panic(p.NewError(fmt.Sprintf("Expected %q", close))) + } else { + p.Next() } } @@ -346,58 +275,55 @@ func (p *parser) checkCrossRef(id string, value Value) { func (p *parser) readEntry(t string) { close := p.readOpen() p.eatSpace() - var entry Entry - key := p.readToken("," + string(close)) - entry.Key = key - entry.Type = t - entry.Fields = make(map[string]Value) - key = strings.ToLower(key) + if close == ')' { + p.readToken(",") + } else { + p.readToken(",}") + } + key := strings.ToLower(p.Text()) + entry := &Entry{} store := true if _, in := p.Entries[key]; in { p.Warning(fmt.Sprintf("Entry %q already defined, ignoring", key)) store = false + } else { + entry.Key = key + entry.Type = t + entry.Fields = make(map[string]Value) + p.Entries[key] = entry + p.EKeys = append(p.EKeys, key) } - for { + for p.eatSpace(); p.ch != close; p.eatSpace() { + if p.ch != ',' { + panic(p.NewError(fmt.Sprintf("Expected ',' or %q", close))) + } + p.Next() p.eatSpace() - if ch := p.read(); ch == close { + if p.ch == close { break - } else if ch == ',' { - p.eatSpace() - if ch := p.read(); ch == close { - break - } else { - p.unread() - id, value := p.readIdValue() - id = strings.ToLower(id) - if _, in := entry.Fields[id]; in { - p.Warning(fmt.Sprintf("Field %q already defined, ignoring", - id)) - } else { - p.checkCrossRef(id, value) - entry.Fields[id] = value - entry.FNames = append(entry.FNames, id) - } - } - } else { - p.unread() - panic(p.NewError(fmt.Sprintf("Expected ',' or %q", close))) + } + id, value := p.readIdValue() + id = strings.ToLower(id) + if _, in := entry.Fields[id]; in { + p.Warning(fmt.Sprintf("Field %q already defined, ignoring", id)) + } else if store { + p.checkCrossRef(id, value) + entry.Fields[id] = value + entry.FNames = append(entry.FNames, id) } } - if store { - p.Entries[key] = entry - p.EKeys = append(p.EKeys, key) - } + p.Next() } -func (p *parser) readDeclaration() (err error) { - defer errorHandler(&err, p) - - t := p.readIdentifier() +func (p *parser) readDeclaration() { + p.readIdentifier() + p.eatSpace() + t := p.Text() if t == "" { - err = p.NewError("Expected entry type") + panic(p.NewError("Expected entry type")) } t = strings.ToLower(t) switch t { @@ -409,18 +335,21 @@ func (p *parser) readDeclaration() (err error) { default: p.readEntry(t) } - return } -func errorHandler(errp *error, p *parser) { +func errorHandler(err *error, p *parser, strict bool) { if e := recover(); e != nil { - switch e.(type) { + switch er := e.(type) { case ParseError: - *errp = e.(ParseError) - case error: - if e == io.EOF { - *errp = e.(error) + *err = er + if strict { + return } else { + log.Print(er.Error()) + p.parse(strict) + } + case error: + if e != io.EOF { panic(e) } default: @@ -429,27 +358,21 @@ func errorHandler(errp *error, p *parser) { } } -func Parse(r io.Reader, strict bool) (db *Database, err error) { - - p := NewParser(r) - var ch rune - +func (p *parser) parse(strict bool) (err error) { + defer errorHandler(&err, p, strict) for { - ch = p.readUnsafe() - switch ch { - case '@': + if p.ch == '@' { + p.Next() p.eatSpace() - err = p.readDeclaration() - if err != nil { - if strict { - return - } else { - log.Print(err.Error()) - } - } - case eof: - db = p.Database - return + p.readDeclaration() } + p.Next() } } + +func Parse(r io.Reader, strict bool) (db *Database, err error) { + p := NewParser(r) + err = p.parse(strict) + db = p.Database + return +} @@ -158,6 +158,8 @@ func squeezeSpaces(s string) string { return buf.String() } +var replacer = strings.NewReplacer("---", "—", "--", "–", "~", "\u00A0", "``", "“", "''", "”", "'", "’", "\"", "”") + func UnTex(s string) string { r := newReader(s) var buf bytes.Buffer @@ -183,8 +185,6 @@ func UnTex(s string) string { } s = string(norm.NFC.Bytes(buf.Bytes())) s = squeezeSpaces(s) - replacer := strings.NewReplacer("---", "—", "--", "–", "~", "\u00A0", - "``", "“", "''", "”", "'", "’", "\"", "”") return replacer.Replace(s) } |
