diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2016-02-04 19:46:04 -0500 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2016-02-04 19:46:04 -0500 |
| commit | 871c61c6b4351d4a9dd78ba1d70d6e1af8ffe1e7 (patch) | |
| tree | 99bce3e74cbcff075dcb6bceacd0f2e1133bef4d /utils.py | |
| parent | fd20589a448cd19d036f18cabb1663c33a24375d (diff) | |
| download | anonbib-871c61c6b4351d4a9dd78ba1d70d6e1af8ffe1e7.tar.gz | |
Start cleaning: PEP8 and split the BibTeX.py monster
Diffstat (limited to 'utils.py')
| -rw-r--r-- | utils.py | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..4d4b583 --- /dev/null +++ b/utils.py @@ -0,0 +1,118 @@ +import re +import os + +ALLCHARS = "".join(map(chr,range(256))) +RE_LONE_AMP = re.compile(r'&([^a-z0-9])') +RE_LONE_I = re.compile(r'\\i([^a-z0-9])') +RE_ACCENT = re.compile(r'\\([\'`~^"c])([^{]|{.})') +RE_LIGATURE = re.compile(r'\\(AE|ae|OE|oe|AA|aa|O|o|ss)([^a-z0-9])') +ACCENT_MAP = { "'" : 'acute', + "`" : 'grave', + "~" : 'tilde', + "^" : 'circ', + '"' : 'uml', + "c" : 'cedil', + } + +UNICODE_MAP = { 'ń' : 'ń', } +HTML_LIGATURE_MAP = { + 'AE' : 'Æ', + 'ae' : 'æ', + 'OE' : 'Œ', + 'oe' : 'œ', + 'AA' : 'Å', + 'aa' : 'å', + 'O' : 'Ø', + 'o' : 'ø', + 'ss' : 'ß', + } +RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)") +RE_PAGE_SPAN = re.compile(r"(\d)--(\d)") + +def url_untranslate(s): + """Change a BibTeX key into a string suitable for use in a URL.""" + s = re.sub(r'([%<>`#, &_\';])', lambda m: "_%02x" % ord(m.group(1)), s) + s = s.replace("/", ":") + return s + +def txtize(s): + """Turn a TeX string into decnent plaintext.""" + s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s) + s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s) + s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s) + s = RE_TEX_CMD.sub("", s) + s = s.translate(ALLCHARS, "{}") + return s + +def unTeXescapeURL(s): + """Turn a URL as formatted in TeX into a real URL.""" + s = s.replace("\\_", "_") + s = s.replace("\\-", "") + s = s.replace("\{}", "") + s = s.replace("{}", "") + return s + +def TeXescapeURL(s): + """Escape a URL for use in TeX""" + s = s.replace("_", "\\_") + s = s.replace("~", "\{}~") + return s + +def _unaccent(m): + accent,char = m.groups() + if char[0] == '{': + char = char[1] + accented = "&%s%s;" % (char, ACCENT_MAP[accent]) + return UNICODE_MAP.get(accented, accented) + +def _unlig_html(m): + return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)],m.group(2)) + +def htmlize(s): + """Turn a TeX string into good-looking HTML.""" + s = RE_LONE_AMP.sub(lambda m: "&%s" % m.group(1), s) + s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s) + s = RE_ACCENT.sub(_unaccent, s) + s = unTeXescapeURL(s) + s = RE_LIGATURE.sub(_unlig_html, s); + s = RE_TEX_CMD.sub("", s) + s = s.translate(ALLCHARS, "{}") + s = RE_PAGE_SPAN.sub(lambda m: "%s-%s"%(m.groups()), s) + s = s.replace("---", "—"); + s = s.replace("--", "–"); + return s + +def smartJoin(*lst): + """Equivalent to os.path.join, but handle"." and ".." entries a bit better. + """ + lst = [item for item in lst if item != "."] + idx = 0 + while idx < len(lst): + if idx > 0 and lst[idx] == "..": + del lst[idx] + else: + idx += 1 + return os.path.join(*lst) + +def _split(s,w=79,indent=8): + r = [] + s = re.sub(r"\s+", " ", s) + first = 1 + indentation = "" + while len(s) > w: + for i in xrange(w-1, 20, -1): + if s[i] == ' ': + r.append(indentation+s[:i]) + s = s[i+1:] + break + else: + r.append(indentation+s.strip()) + s = "" + if first: + first = 0 + w -= indent + indentation = " "*indent + if (s): + r.append(indentation+s) + r.append("") + return "\n".join(r) |
