summaryrefslogtreecommitdiffstats
path: root/utils.go
diff options
context:
space:
mode:
Diffstat (limited to 'utils.go')
-rw-r--r--utils.go149
1 files changed, 149 insertions, 0 deletions
diff --git a/utils.go b/utils.go
new file mode 100644
index 0000000..1ca8298
--- /dev/null
+++ b/utils.go
@@ -0,0 +1,149 @@
+package bibtex
+
+import (
+ "bytes"
+ "strings"
+ "unicode"
+)
+
+// reader for LaTeX strings with methods to easily handle special chars, etc
+type reader struct {
+ *strings.Reader
+}
+
+func newReader(s string) *reader {
+ return &reader{Reader: strings.NewReader(s)}
+}
+
+func (r *reader) readRune() rune {
+ c, _, _ := r.Reader.ReadRune()
+ return c
+}
+
+func (r *reader) readBraces() string {
+ bracelevel := 1
+ var c rune
+ var buf bytes.Buffer
+ for bracelevel > 0 {
+ c = r.readRune()
+ switch {
+ case c == '{':
+ bracelevel += 1
+ case c == '}':
+ bracelevel -= 1
+ case c == eof:
+ return buf.String()
+ }
+ buf.WriteRune(c)
+ }
+ return buf.String()
+}
+
+func (r *reader) skipBraces() {
+ bracelevel := 1
+ var c rune
+ for bracelevel > 0 {
+ c = r.readRune()
+ switch {
+ case c == '{':
+ bracelevel += 1
+ case c == '}':
+ bracelevel -= 1
+ case c == eof:
+ return
+ }
+ }
+}
+
+func (r *reader) peek() rune {
+ ch := r.readRune()
+ r.UnreadRune()
+ return ch
+}
+
+// length of a LaTeX string. A special char counts as one, braces count as one
+// to be consitent with the original BibTeX implementation, but this seems
+// stupid
+func length(s string) (res int) {
+ reader := newReader(s)
+ res = 0
+ blevel := 0
+ for c := reader.readRune(); c != eof; c = reader.readRune() {
+ switch {
+ case c == '{' && blevel == 0:
+ blevel += 1
+ if c := reader.peek(); c == '\\' {
+ // special char, count as one
+ reader.skipBraces()
+ blevel = 0
+ }
+ case c == '{':
+ blevel += 1
+ case c == '}':
+ blevel -= 1
+ }
+ res += 1
+ }
+ return
+}
+
+func (r *reader) searchBraces() (bool, bool) {
+ bracelevel := 1
+ var c rune
+ for bracelevel > 0 {
+ c = r.readRune()
+ switch {
+ case c == '{':
+ bracelevel += 1
+ case c == '}':
+ bracelevel -= 1
+ case unicode.IsLower(c):
+ return true, true
+ case unicode.IsUpper(c):
+ return false, true
+ case c == eof:
+ return false, false
+ }
+ }
+ return false, false
+}
+
+func (r *reader) readCommand() string {
+ var buf bytes.Buffer
+ for c := r.readRune(); c != eof; c = r.readRune() {
+ if !unicode.IsLetter(c) {
+ r.UnreadRune()
+ break
+ } else {
+ buf.WriteRune(c)
+ }
+ }
+ return buf.String()
+}
+
+func (t *Token) isLower() bool {
+ reader := newReader(t.Text)
+ for c := reader.readRune(); c != eof; c = reader.readRune() {
+ switch {
+ case c == '{':
+ if c := reader.readRune(); c == '\\' {
+ command := reader.readCommand()
+ if lcommands[command] {
+ return true
+ } else if ucommands[command] {
+ return false
+ } else if a, b := reader.searchBraces(); b {
+ return a
+ }
+ } else {
+ reader.UnreadRune()
+ reader.skipBraces()
+ }
+ case unicode.IsLower(c):
+ return true
+ case unicode.IsUpper(c):
+ return false
+ }
+ }
+ return false
+}