summaryrefslogtreecommitdiffstats
path: root/utils.go
diff options
context:
space:
mode:
Diffstat (limited to 'utils.go')
-rw-r--r--utils.go105
1 files changed, 105 insertions, 0 deletions
diff --git a/utils.go b/utils.go
index e91ae44..3a1dd6a 100644
--- a/utils.go
+++ b/utils.go
@@ -4,8 +4,48 @@ import (
"bytes"
"strings"
"unicode"
+
+ "golang.org/x/text/unicode/norm"
)
+var accents = map[string]string{
+ "'": "\u0301",
+ "`": "\u0300",
+ "^": "\u0302",
+ "\"": "\u0308",
+ "~": "\u0303",
+ "=": "\u0304",
+ ".": "\u0307",
+ "u": "\u0306",
+ "v": "\u030C",
+ "H": "\u030B",
+ "t": "\u0361",
+ "c": "\u0327",
+ "d": "\u0323",
+ "b": "\u0331",
+}
+
+var commands = map[string]string{
+ "aa": "å",
+ "AA": "Å",
+ "oe": "œ",
+ "ae": "æ",
+ "OE": "Œ",
+ "AE": "Æ",
+ "ss": "ß",
+ "o": "ø",
+ "O": "Ø",
+ "L": "Ł",
+ "l": "ł",
+ "$": "$",
+ "{": "{",
+ "}": "}",
+ "_": "_",
+ "#": "#",
+ "%": "%",
+ "&": "&",
+}
+
// reader for LaTeX strings with methods to easily handle special chars, etc
type reader struct {
*strings.Reader
@@ -61,6 +101,71 @@ func (r *reader) peek() rune {
return ch
}
+func (r *reader) eatSpace() {
+ for c := r.readRune(); c != eof; c = r.readRune() {
+ if !unicode.IsSpace(c) {
+ r.UnreadRune()
+ break
+ }
+ }
+}
+
+func (r *reader) readLetter() (d rune) {
+ c := r.readRune()
+ switch c {
+ case '{':
+ blevel := 1
+ for c != eof {
+ c = r.readRune()
+ if c == '{' {
+ blevel += 1
+ } else if c == '}' {
+ blevel -= 1
+ if blevel == 0 {
+ break
+ }
+ } else {
+ if d == rune(0) {
+ d = c
+ }
+ }
+ }
+ default:
+ d = c
+ return
+ }
+ if d == rune(0) {
+ d = ' '
+ }
+ return
+}
+
+func removeLatexCommands(s string) string {
+ r := newReader(s)
+ var buf bytes.Buffer
+ for c := r.readRune(); c != eof; c = r.readRune() {
+ switch c {
+ case '\\':
+ command := r.readCommand()
+ if command == "" {
+ command = string(r.readRune())
+ }
+ if v, in := commands[command]; in {
+ buf.WriteString(v)
+ } else if v, in := accents[command]; in {
+ r.eatSpace()
+ l := r.readLetter()
+ buf.WriteRune(l)
+ buf.WriteString(v)
+ }
+ case '{', '}':
+ default:
+ buf.WriteRune(c)
+ }
+ }
+ return string(norm.NFC.Bytes(buf.Bytes()))
+}
+
// length of a LaTeX string. A special char counts as one, braces count as one
// to be consitent with the original BibTeX implementation, but this seems
// stupid