diff options
Diffstat (limited to 'utils.go')
| -rw-r--r-- | utils.go | 26 |
1 files changed, 24 insertions, 2 deletions
@@ -140,7 +140,25 @@ func (r *reader) readLetter() (d rune) { return } -func removeLatexCommands(s string) string { +func squeezeSpaces(s string) string { + var buf bytes.Buffer + space := false + for _, c := range s { + switch c { + case ' ', '\n', '\t': + if !space { + buf.WriteRune(' ') + } + space = true + default: + buf.WriteRune(c) + space = false + } + } + return buf.String() +} + +func UnTex(s string) string { r := newReader(s) var buf bytes.Buffer for c := r.readRune(); c != eof; c = r.readRune() { @@ -163,7 +181,11 @@ func removeLatexCommands(s string) string { buf.WriteRune(c) } } - return string(norm.NFC.Bytes(buf.Bytes())) + s = string(norm.NFC.Bytes(buf.Bytes())) + s = squeezeSpaces(s) + replacer := strings.NewReplacer("---", "—", "--", "–", "~", "\u00A0", + "``", "“", "''", "”", "'", "’", "\"", "”") + return replacer.Replace(s) } // length of a LaTeX string. A special char counts as one, braces count as one |
