From bd74996cf63f60e41df951e281410a1de32cb0b7 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Sun, 21 Feb 2016 21:27:08 -0500 Subject: Add name parsing/formatting logic --- utils.go | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 utils.go (limited to 'utils.go') diff --git a/utils.go b/utils.go new file mode 100644 index 0000000..1ca8298 --- /dev/null +++ b/utils.go @@ -0,0 +1,149 @@ +package bibtex + +import ( + "bytes" + "strings" + "unicode" +) + +// reader for LaTeX strings with methods to easily handle special chars, etc +type reader struct { + *strings.Reader +} + +func newReader(s string) *reader { + return &reader{Reader: strings.NewReader(s)} +} + +func (r *reader) readRune() rune { + c, _, _ := r.Reader.ReadRune() + return c +} + +func (r *reader) readBraces() string { + bracelevel := 1 + var c rune + var buf bytes.Buffer + for bracelevel > 0 { + c = r.readRune() + switch { + case c == '{': + bracelevel += 1 + case c == '}': + bracelevel -= 1 + case c == eof: + return buf.String() + } + buf.WriteRune(c) + } + return buf.String() +} + +func (r *reader) skipBraces() { + bracelevel := 1 + var c rune + for bracelevel > 0 { + c = r.readRune() + switch { + case c == '{': + bracelevel += 1 + case c == '}': + bracelevel -= 1 + case c == eof: + return + } + } +} + +func (r *reader) peek() rune { + ch := r.readRune() + r.UnreadRune() + return ch +} + +// length of a LaTeX string. A special char counts as one, braces count as one +// to be consitent with the original BibTeX implementation, but this seems +// stupid +func length(s string) (res int) { + reader := newReader(s) + res = 0 + blevel := 0 + for c := reader.readRune(); c != eof; c = reader.readRune() { + switch { + case c == '{' && blevel == 0: + blevel += 1 + if c := reader.peek(); c == '\\' { + // special char, count as one + reader.skipBraces() + blevel = 0 + } + case c == '{': + blevel += 1 + case c == '}': + blevel -= 1 + } + res += 1 + } + return +} + +func (r *reader) searchBraces() (bool, bool) { + bracelevel := 1 + var c rune + for bracelevel > 0 { + c = r.readRune() + switch { + case c == '{': + bracelevel += 1 + case c == '}': + bracelevel -= 1 + case unicode.IsLower(c): + return true, true + case unicode.IsUpper(c): + return false, true + case c == eof: + return false, false + } + } + return false, false +} + +func (r *reader) readCommand() string { + var buf bytes.Buffer + for c := r.readRune(); c != eof; c = r.readRune() { + if !unicode.IsLetter(c) { + r.UnreadRune() + break + } else { + buf.WriteRune(c) + } + } + return buf.String() +} + +func (t *Token) isLower() bool { + reader := newReader(t.Text) + for c := reader.readRune(); c != eof; c = reader.readRune() { + switch { + case c == '{': + if c := reader.readRune(); c == '\\' { + command := reader.readCommand() + if lcommands[command] { + return true + } else if ucommands[command] { + return false + } else if a, b := reader.searchBraces(); b { + return a + } + } else { + reader.UnreadRune() + reader.skipBraces() + } + case unicode.IsLower(c): + return true + case unicode.IsUpper(c): + return false + } + } + return false +} -- cgit v1.2.3-70-g09d2