summaryrefslogtreecommitdiffstats
path: root/utils.go
diff options
context:
space:
mode:
Diffstat (limited to 'utils.go')
-rw-r--r--utils.go26
1 files changed, 24 insertions, 2 deletions
diff --git a/utils.go b/utils.go
index 3a1dd6a..d4c078f 100644
--- a/utils.go
+++ b/utils.go
@@ -140,7 +140,25 @@ func (r *reader) readLetter() (d rune) {
return
}
-func removeLatexCommands(s string) string {
+func squeezeSpaces(s string) string {
+ var buf bytes.Buffer
+ space := false
+ for _, c := range s {
+ switch c {
+ case ' ', '\n', '\t':
+ if !space {
+ buf.WriteRune(' ')
+ }
+ space = true
+ default:
+ buf.WriteRune(c)
+ space = false
+ }
+ }
+ return buf.String()
+}
+
+func UnTex(s string) string {
r := newReader(s)
var buf bytes.Buffer
for c := r.readRune(); c != eof; c = r.readRune() {
@@ -163,7 +181,11 @@ func removeLatexCommands(s string) string {
buf.WriteRune(c)
}
}
- return string(norm.NFC.Bytes(buf.Bytes()))
+ s = string(norm.NFC.Bytes(buf.Bytes()))
+ s = squeezeSpaces(s)
+ replacer := strings.NewReplacer("---", "—", "--", "–", "~", "\u00A0",
+ "``", "“", "''", "”", "'", "’", "\"", "”")
+ return replacer.Replace(s)
}
// length of a LaTeX string. A special char counts as one, braces count as one