From 871c61c6b4351d4a9dd78ba1d70d6e1af8ffe1e7 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Thu, 4 Feb 2016 19:46:04 -0500 Subject: Start cleaning: PEP8 and split the BibTeX.py monster --- utils.py | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 utils.py (limited to 'utils.py') diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..4d4b583 --- /dev/null +++ b/utils.py @@ -0,0 +1,118 @@ +import re +import os + +ALLCHARS = "".join(map(chr,range(256))) +RE_LONE_AMP = re.compile(r'&([^a-z0-9])') +RE_LONE_I = re.compile(r'\\i([^a-z0-9])') +RE_ACCENT = re.compile(r'\\([\'`~^"c])([^{]|{.})') +RE_LIGATURE = re.compile(r'\\(AE|ae|OE|oe|AA|aa|O|o|ss)([^a-z0-9])') +ACCENT_MAP = { "'" : 'acute', + "`" : 'grave', + "~" : 'tilde', + "^" : 'circ', + '"' : 'uml', + "c" : 'cedil', + } + +UNICODE_MAP = { 'ń' : 'ń', } +HTML_LIGATURE_MAP = { + 'AE' : 'Æ', + 'ae' : 'æ', + 'OE' : 'Œ', + 'oe' : 'œ', + 'AA' : 'Å', + 'aa' : 'å', + 'O' : 'Ø', + 'o' : 'ø', + 'ss' : 'ß', + } +RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)") +RE_PAGE_SPAN = re.compile(r"(\d)--(\d)") + +def url_untranslate(s): + """Change a BibTeX key into a string suitable for use in a URL.""" + s = re.sub(r'([%<>`#, &_\';])', lambda m: "_%02x" % ord(m.group(1)), s) + s = s.replace("/", ":") + return s + +def txtize(s): + """Turn a TeX string into decnent plaintext.""" + s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s) + s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s) + s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s) + s = RE_TEX_CMD.sub("", s) + s = s.translate(ALLCHARS, "{}") + return s + +def unTeXescapeURL(s): + """Turn a URL as formatted in TeX into a real URL.""" + s = s.replace("\\_", "_") + s = s.replace("\\-", "") + s = s.replace("\{}", "") + s = s.replace("{}", "") + return s + +def TeXescapeURL(s): + """Escape a URL for use in TeX""" + s = s.replace("_", "\\_") + s = s.replace("~", "\{}~") + return s + +def _unaccent(m): + accent,char = m.groups() + if char[0] == '{': + char = char[1] + accented = "&%s%s;" % (char, ACCENT_MAP[accent]) + return UNICODE_MAP.get(accented, accented) + +def _unlig_html(m): + return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)],m.group(2)) + +def htmlize(s): + """Turn a TeX string into good-looking HTML.""" + s = RE_LONE_AMP.sub(lambda m: "&%s" % m.group(1), s) + s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s) + s = RE_ACCENT.sub(_unaccent, s) + s = unTeXescapeURL(s) + s = RE_LIGATURE.sub(_unlig_html, s); + s = RE_TEX_CMD.sub("", s) + s = s.translate(ALLCHARS, "{}") + s = RE_PAGE_SPAN.sub(lambda m: "%s-%s"%(m.groups()), s) + s = s.replace("---", "—"); + s = s.replace("--", "–"); + return s + +def smartJoin(*lst): + """Equivalent to os.path.join, but handle"." and ".." entries a bit better. + """ + lst = [item for item in lst if item != "."] + idx = 0 + while idx < len(lst): + if idx > 0 and lst[idx] == "..": + del lst[idx] + else: + idx += 1 + return os.path.join(*lst) + +def _split(s,w=79,indent=8): + r = [] + s = re.sub(r"\s+", " ", s) + first = 1 + indentation = "" + while len(s) > w: + for i in xrange(w-1, 20, -1): + if s[i] == ' ': + r.append(indentation+s[:i]) + s = s[i+1:] + break + else: + r.append(indentation+s.strip()) + s = "" + if first: + first = 0 + w -= indent + indentation = " "*indent + if (s): + r.append(indentation+s) + r.append("") + return "\n".join(r) -- cgit v1.2.3-70-g09d2