summaryrefslogtreecommitdiffstats
path: root/parser.go
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2019-06-21 13:16:47 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2019-06-21 13:16:47 -0400
commit84121b3bb4820945eeec3ad6084f24c37b015683 (patch)
treed4aa970ce388eae8e1a41c4d94decedbc4f6f9f3 /parser.go
parent58011496071a49e8f5f916a0416c36806ee94614 (diff)
downloadbibtex-master.tar.gz
Improving parser by factoring out a scannerHEADmaster
Diffstat (limited to 'parser.go')
-rw-r--r--parser.go435
1 files changed, 179 insertions, 256 deletions
diff --git a/parser.go b/parser.go
index 15e941d..2e3b985 100644
--- a/parser.go
+++ b/parser.go
@@ -14,6 +14,51 @@ import (
const eof = rune(0)
+type scanner struct {
+ *bufio.Reader
+ lineno int
+ colno int
+ ch rune
+ fname string
+ buffer bytes.Buffer
+}
+
+func NewScanner(r io.Reader) *scanner {
+ var name string
+ switch v := r.(type) {
+ case *os.File:
+ name = v.Name()
+ default:
+ name = ""
+ }
+ return &scanner{Reader: bufio.NewReader(r), lineno: 1, colno: 0,
+ fname: name}
+}
+
+func (s *scanner) Next() {
+ s.colno += 1
+ ch, _, err := s.ReadRune()
+ s.ch = ch
+ if err != nil {
+ panic(err)
+ }
+ if ch == '\n' {
+ s.lineno += 1
+ s.colno = 0
+ }
+}
+
+func (s *scanner) Text() string {
+ return s.buffer.String()
+}
+
+func (s *scanner) readToken(endChars string) {
+ s.buffer.Reset()
+ for !unicode.IsSpace(s.ch) && !strings.ContainsRune(endChars, s.ch) {
+ s.buffer.WriteRune(s.ch)
+ s.Next()
+ }
+}
type ParseError struct {
line int
col int
@@ -26,12 +71,8 @@ func (e ParseError) Error() string {
}
type parser struct {
- *bufio.Reader
*Database
- fname string
- lineno int
- colno int
- lastread rune
+ *scanner
}
func NewParser(r io.Reader) *parser {
@@ -41,21 +82,15 @@ func NewParser(r io.Reader) *parser {
"sep": "September", "oct": "October", "nov": "November",
"dec": "December",
}
- var name string
- switch v := r.(type) {
- case *os.File:
- name = v.Name()
- default:
- name = ""
- }
res := make(map[string]Value, len(strings))
for key, s := range strings {
res[key] = StringLiteral(s)
}
- db := &Database{Strings: res, Entries: make(map[string]Entry),
- CrossRefs: make(map[string]int)}
- return &parser{Reader: bufio.NewReader(r), lineno: 1, colno: 0,
- fname: name, Database: db}
+ var preamble LiteralList
+ db := &Database{Strings: res, Entries: make(map[string]*Entry),
+ CrossRefs: make(map[string]int),
+ Preamble: preamble}
+ return &parser{scanner: NewScanner(r), Database: db}
}
func (p *parser) NewError(msg string) error {
@@ -66,237 +101,129 @@ func (p *parser) Warning(msg string) {
log.Printf("%s:%d:%d warning: %s", p.fname, p.lineno, p.colno, msg)
}
-func (p *parser) readUnsafe() rune {
- ch, _, _ := p.ReadRune()
- if ch == '\n' {
- p.lineno += 1
- p.colno = 0
- }
- p.colno += 1
- p.lastread = ch
- return ch
-}
-
-func (p *parser) read() rune {
- ch, _, err := p.ReadRune()
- if err == io.EOF {
- panic(err)
- }
- if ch == '\n' {
- p.lineno += 1
- p.colno = 0
- }
- p.colno += 1
- p.lastread = ch
- return ch
-}
-
-func (p *parser) peek() rune {
- ch, _, err := p.ReadRune()
- p.UnreadRune()
- if err == io.EOF {
- return eof
- }
- return ch
-}
-func (p *parser) unread() {
- p.colno -= 1
- if p.lastread == '\n' {
- p.lineno -= 1
- }
- p.UnreadRune()
-}
+const IllegalIdChars = "'{}()=\",#%"
-func (p *parser) readToken(s string) string {
- var buf bytes.Buffer
- var ch rune
-
- for {
- ch = p.read()
- if strings.ContainsRune(s, ch) || unicode.IsSpace(ch) {
- p.unread()
- break
- } else {
- buf.WriteRune(ch)
- }
- }
- return buf.String()
-}
-
-const IllegalIdChars = "{}()=\",#%"
-
-func (p *parser) readIdentifier() string {
- var buf bytes.Buffer
- var ch rune
- if ch = p.read(); unicode.IsDigit(ch) {
- p.unread()
- return ""
- } else if strings.ContainsRune(IllegalIdChars, ch) ||
- unicode.IsSpace(ch) {
- p.unread()
- return ""
+func (s *scanner) readIdentifier() {
+ if unicode.IsDigit(s.ch) {
+ s.buffer.Reset()
+ return
} else {
- buf.WriteRune(ch)
+ s.readToken(IllegalIdChars)
}
-
- for {
- ch = p.read()
- if strings.ContainsRune(IllegalIdChars, ch) || unicode.IsSpace(ch) {
- p.unread()
- break
- } else {
- buf.WriteRune(ch)
- }
- }
- return buf.String()
}
-func (p *parser) eatSpace() {
- var ch rune
- for {
- ch = p.read()
- if !unicode.IsSpace(ch) {
- p.unread()
- break
- }
+func (s *scanner) eatSpace() {
+ for ; unicode.IsSpace(s.ch); s.Next() {
}
}
-func (p *parser) readBraceLiteral() BraceLiteral {
- var buf bytes.Buffer
- ch := p.read()
- if ch != '{' {
- p.unread()
- panic(p.NewError("Expected '{'"))
- }
- blevel := 1
- for {
- ch = p.read()
- if ch == '{' {
+func (s *scanner) readBraceLiteral() {
+ s.buffer.Reset()
+ s.Next()
+ blevel := 0
+ for blevel > 0 || (blevel == 0 && s.ch != '}') {
+ if s.ch == '{' {
blevel += 1
- } else if ch == '}' {
+ } else if s.ch == '}' {
blevel -= 1
- if blevel == 0 {
- break
- }
}
- buf.WriteRune(ch)
+ s.buffer.WriteRune(s.ch)
+ s.Next()
}
- return BraceLiteral(buf.String())
+ s.Next()
}
-func (p *parser) readStringLiteral() StringLiteral {
- var buf bytes.Buffer
- ch := p.read()
- if ch != '"' {
- p.unread()
- panic(p.NewError("Expected '\"'"))
- }
+func (s *scanner) readStringLiteral() {
+ s.buffer.Reset()
blevel := 0
- for blevel >= 0 {
- ch = p.read()
- if ch == '{' {
+ s.Next()
+ for blevel > 0 || (blevel == 0 && s.ch != '"') {
+ if s.ch == '{' {
blevel += 1
- } else if ch == '}' {
+ } else if s.ch == '}' {
blevel -= 1
- } else if ch == '"' && blevel == 0 {
- break
}
- buf.WriteRune(ch)
+ s.buffer.WriteRune(s.ch)
+ s.Next()
}
- if blevel != 0 {
- panic(p.NewError("Unbalanced '{'"))
- }
- return StringLiteral(buf.String())
+ s.Next()
}
-func (p *parser) readNumber() NumberLiteral {
- var buf bytes.Buffer
- var ch rune
- for {
- ch = p.read()
- if !unicode.IsDigit(ch) {
- p.unread()
- break
- } else {
- buf.WriteRune(ch)
- }
- }
- n, err := strconv.Atoi(buf.String())
- if err != nil {
- p.Warning(fmt.Sprintf("Couldn't parse number %q", buf.String()))
+func (s *scanner) readNumber() {
+ s.buffer.Reset()
+ for unicode.IsDigit(s.ch) {
+ s.buffer.WriteRune(s.ch)
+ s.Next()
}
- return NumberLiteral(n)
}
func (p *parser) readLiteral() Value {
- if ch := p.peek(); ch == '{' {
- return p.readBraceLiteral()
- } else if ch == '"' {
- return p.readStringLiteral()
- } else if unicode.IsDigit(ch) {
- return p.readNumber()
- } else {
- id := strings.ToLower(p.readIdentifier())
+ switch {
+ case p.ch == '{':
+ p.readBraceLiteral()
+ return BraceLiteral(p.Text())
+ case p.ch == '"':
+ p.readStringLiteral()
+ return StringLiteral(p.Text())
+ case unicode.IsDigit(p.ch):
+ p.readNumber()
+ n, err := strconv.Atoi(p.Text())
+ if err != nil {
+ p.Warning(fmt.Sprintf("Couldn't parse number %q", p.Text()))
+ }
+ return NumberLiteral(n)
+ default:
+ p.readIdentifier()
+ id := p.Text()
if id == "" {
panic(p.NewError("Expected an identifier"))
}
- if v, in := p.Strings[id]; in {
+ if v, in := p.Strings[strings.ToLower(id)]; in {
return VarLiteral{id, &v}
} else {
p.Warning(fmt.Sprintf("Unknown string %q", id))
- l := Value(StringLiteral(""))
+ l := Value(StringLiteral(id))
return VarLiteral{id, &l}
}
}
}
func (p *parser) readValue() Value {
- var ch rune
var res LiteralList
res = append(res, p.readLiteral())
- for {
+ for p.eatSpace(); p.ch == '#'; p.eatSpace() {
+ p.Next()
p.eatSpace()
- if ch = p.read(); ch == '#' {
- p.eatSpace()
- res = append(res, p.readLiteral())
- } else {
- p.unread()
- break
- }
- }
- if len(res) == 1 {
- return res[0]
- } else {
- return res
+ res = append(res, p.readLiteral())
}
+ return res
}
func (p *parser) readIdValue() (string, Value) {
- p.eatSpace()
- id := p.readIdentifier()
+ p.readIdentifier()
+ id := p.Text()
if id == "" {
panic(p.NewError("Expected an identifier"))
}
p.eatSpace()
- if ch := p.read(); ch != '=' {
- p.unread()
+ if p.ch != '=' {
panic(p.NewError("Expected '='"))
}
+ p.Next()
p.eatSpace()
value := p.readValue()
return id, value
}
func (p *parser) readOpen() rune {
- p.eatSpace()
- if ch := p.read(); ch == '(' {
+ if p.ch == '(' {
+ p.Next()
return ')'
- } else if ch == '{' {
+ } else if p.ch == '{' {
+ p.Next()
return '}'
} else {
- p.unread()
panic(p.NewError("Expected '(' or '{'"))
}
}
@@ -307,9 +234,10 @@ func (p *parser) readPreamble() {
prb := p.Preamble.(LiteralList)
p.Preamble = append(prb, p.readValue())
p.eatSpace()
- if ch := p.read(); ch != close {
- p.unread()
+ if p.ch != close {
panic(p.NewError(fmt.Sprintf("Expected %q", close)))
+ } else {
+ p.Next()
}
}
@@ -319,15 +247,16 @@ func (p *parser) readString() {
id, value := p.readIdValue()
id = strings.ToLower(id)
if _, in := p.Strings[id]; in {
- p.Warning(fmt.Sprintf("String %q already defined, ignoring", id))
+ p.Warning(fmt.Sprintf("String %q already defined, overwriting", id))
} else {
- p.Strings[id] = value
p.SNames = append(p.SNames, id)
}
+ p.Strings[id] = value
p.eatSpace()
- if ch := p.read(); ch != close {
- p.unread()
+ if p.ch != close {
panic(p.NewError(fmt.Sprintf("Expected %q", close)))
+ } else {
+ p.Next()
}
}
@@ -346,58 +275,55 @@ func (p *parser) checkCrossRef(id string, value Value) {
func (p *parser) readEntry(t string) {
close := p.readOpen()
p.eatSpace()
- var entry Entry
- key := p.readToken("," + string(close))
- entry.Key = key
- entry.Type = t
- entry.Fields = make(map[string]Value)
- key = strings.ToLower(key)
+ if close == ')' {
+ p.readToken(",")
+ } else {
+ p.readToken(",}")
+ }
+ key := strings.ToLower(p.Text())
+ entry := &Entry{}
store := true
if _, in := p.Entries[key]; in {
p.Warning(fmt.Sprintf("Entry %q already defined, ignoring", key))
store = false
+ } else {
+ entry.Key = key
+ entry.Type = t
+ entry.Fields = make(map[string]Value)
+ p.Entries[key] = entry
+ p.EKeys = append(p.EKeys, key)
}
- for {
+ for p.eatSpace(); p.ch != close; p.eatSpace() {
+ if p.ch != ',' {
+ panic(p.NewError(fmt.Sprintf("Expected ',' or %q", close)))
+ }
+ p.Next()
p.eatSpace()
- if ch := p.read(); ch == close {
+ if p.ch == close {
break
- } else if ch == ',' {
- p.eatSpace()
- if ch := p.read(); ch == close {
- break
- } else {
- p.unread()
- id, value := p.readIdValue()
- id = strings.ToLower(id)
- if _, in := entry.Fields[id]; in {
- p.Warning(fmt.Sprintf("Field %q already defined, ignoring",
- id))
- } else {
- p.checkCrossRef(id, value)
- entry.Fields[id] = value
- entry.FNames = append(entry.FNames, id)
- }
- }
- } else {
- p.unread()
- panic(p.NewError(fmt.Sprintf("Expected ',' or %q", close)))
+ }
+ id, value := p.readIdValue()
+ id = strings.ToLower(id)
+ if _, in := entry.Fields[id]; in {
+ p.Warning(fmt.Sprintf("Field %q already defined, ignoring", id))
+ } else if store {
+ p.checkCrossRef(id, value)
+ entry.Fields[id] = value
+ entry.FNames = append(entry.FNames, id)
}
}
- if store {
- p.Entries[key] = entry
- p.EKeys = append(p.EKeys, key)
- }
+ p.Next()
}
-func (p *parser) readDeclaration() (err error) {
- defer errorHandler(&err, p)
-
- t := p.readIdentifier()
+func (p *parser) readDeclaration() {
+ p.readIdentifier()
+ p.eatSpace()
+ t := p.Text()
if t == "" {
- err = p.NewError("Expected entry type")
+ panic(p.NewError("Expected entry type"))
}
t = strings.ToLower(t)
switch t {
@@ -409,18 +335,21 @@ func (p *parser) readDeclaration() (err error) {
default:
p.readEntry(t)
}
- return
}
-func errorHandler(errp *error, p *parser) {
+func errorHandler(err *error, p *parser, strict bool) {
if e := recover(); e != nil {
- switch e.(type) {
+ switch er := e.(type) {
case ParseError:
- *errp = e.(ParseError)
- case error:
- if e == io.EOF {
- *errp = e.(error)
+ *err = er
+ if strict {
+ return
} else {
+ log.Print(er.Error())
+ p.parse(strict)
+ }
+ case error:
+ if e != io.EOF {
panic(e)
}
default:
@@ -429,27 +358,21 @@ func errorHandler(errp *error, p *parser) {
}
}
-func Parse(r io.Reader, strict bool) (db *Database, err error) {
-
- p := NewParser(r)
- var ch rune
-
+func (p *parser) parse(strict bool) (err error) {
+ defer errorHandler(&err, p, strict)
for {
- ch = p.readUnsafe()
- switch ch {
- case '@':
+ if p.ch == '@' {
+ p.Next()
p.eatSpace()
- err = p.readDeclaration()
- if err != nil {
- if strict {
- return
- } else {
- log.Print(err.Error())
- }
- }
- case eof:
- db = p.Database
- return
+ p.readDeclaration()
}
+ p.Next()
}
}
+
+func Parse(r io.Reader, strict bool) (db *Database, err error) {
+ p := NewParser(r)
+ err = p.parse(strict)
+ db = p.Database
+ return
+}