Start cleaning: PEP8 and split the BibTeX.py monster

author: Thibaut Horel <thibaut.horel@gmail.com> 2016-02-04 19:46:04 -0500
committer: Thibaut Horel <thibaut.horel@gmail.com> 2016-02-04 19:46:04 -0500
commit: 871c61c6b4351d4a9dd78ba1d70d6e1af8ffe1e7 (patch)
tree: 99bce3e74cbcff075dcb6bceacd0f2e1133bef4d /utils.py
parent: fd20589a448cd19d036f18cabb1663c33a24375d (diff)
download: anonbib-871c61c6b4351d4a9dd78ba1d70d6e1af8ffe1e7.tar.gz
1 files changed, 118 insertions, 0 deletions
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..4d4b583
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,118 @@
+import re
+import os
+
+ALLCHARS = "".join(map(chr,range(256)))
+RE_LONE_AMP = re.compile(r'&([^a-z0-9])')
+RE_LONE_I = re.compile(r'\\i([^a-z0-9])')
+RE_ACCENT = re.compile(r'\\([\'`~^"c])([^{]|{.})')
+RE_LIGATURE = re.compile(r'\\(AE|ae|OE|oe|AA|aa|O|o|ss)([^a-z0-9])')
+ACCENT_MAP = { "'" : 'acute',
+               "`" : 'grave',
+               "~" : 'tilde',
+               "^" : 'circ',
+               '"' : 'uml',
+               "c" : 'cedil',
+               }
+
+UNICODE_MAP = { '&nacute;' : '&#x0144;', }
+HTML_LIGATURE_MAP = {
+    'AE' : '&AElig;',
+    'ae' : '&aelig;',
+    'OE' : '&OElig;',
+    'oe' : '&oelig;',
+    'AA' : '&Aring;',
+    'aa' : '&aring;',
+    'O'  : '&Oslash;',
+    'o'  : '&oslash;',
+    'ss' : '&szlig;',
+    }
+RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)")
+RE_PAGE_SPAN = re.compile(r"(\d)--(\d)")
+
+def url_untranslate(s):
+    """Change a BibTeX key into a string suitable for use in a URL."""
+    s = re.sub(r'([%<>`#, &_\';])', lambda m: "_%02x" % ord(m.group(1)), s)
+    s = s.replace("/", ":")
+    return s
+
+def txtize(s):
+    """Turn a TeX string into decnent plaintext."""
+    s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s)
+    s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s)
+    s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s)
+    s = RE_TEX_CMD.sub("", s)
+    s = s.translate(ALLCHARS, "{}")
+    return s
+
+def unTeXescapeURL(s):
+    """Turn a URL as formatted in TeX into a real URL."""
+    s = s.replace("\\_", "_")
+    s = s.replace("\\-", "")
+    s = s.replace("\{}", "")
+    s = s.replace("{}", "")
+    return s
+
+def TeXescapeURL(s):
+    """Escape a URL for use in TeX"""
+    s = s.replace("_", "\\_")
+    s = s.replace("~", "\{}~")
+    return s
+
+def _unaccent(m):
+    accent,char = m.groups()
+    if char[0] == '{':
+        char = char[1]
+    accented = "&%s%s;" % (char, ACCENT_MAP[accent])
+    return UNICODE_MAP.get(accented, accented)
+
+def _unlig_html(m):
+    return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)],m.group(2))
+
+def htmlize(s):
+    """Turn a TeX string into good-looking HTML."""
+    s = RE_LONE_AMP.sub(lambda m: "&amp;%s" % m.group(1), s)
+    s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s)
+    s = RE_ACCENT.sub(_unaccent, s)
+    s = unTeXescapeURL(s)
+    s = RE_LIGATURE.sub(_unlig_html, s);
+    s = RE_TEX_CMD.sub("", s)
+    s = s.translate(ALLCHARS, "{}")
+    s = RE_PAGE_SPAN.sub(lambda m: "%s-%s"%(m.groups()), s)
+    s = s.replace("---", "&mdash;");
+    s = s.replace("--", "&ndash;");
+    return s
+
+def smartJoin(*lst):
+    """Equivalent to os.path.join, but handle"." and ".." entries a bit better.
+    """
+    lst = [item for item in lst if item != "."]
+    idx = 0
+    while idx < len(lst):
+        if idx > 0 and lst[idx] == "..":
+            del lst[idx]
+        else:
+            idx += 1
+    return os.path.join(*lst)
+
+def _split(s,w=79,indent=8):
+    r = []
+    s = re.sub(r"\s+", " ", s)
+    first = 1
+    indentation = ""
+    while len(s) > w:
+        for i in xrange(w-1, 20, -1):
+            if s[i] == ' ':
+                r.append(indentation+s[:i])
+                s = s[i+1:]
+                break
+        else:
+            r.append(indentation+s.strip())
+            s = ""
+        if first:
+            first = 0
+            w -= indent
+            indentation = " "*indent
+    if (s):
+        r.append(indentation+s)
+    r.append("")
+    return "\n".join(r)
author	Thibaut Horel <thibaut.horel@gmail.com>	2016-02-04 19:46:04 -0500
committer	Thibaut Horel <thibaut.horel@gmail.com>	2016-02-04 19:46:04 -0500
commit	871c61c6b4351d4a9dd78ba1d70d6e1af8ffe1e7 (patch)
tree	99bce3e74cbcff075dcb6bceacd0f2e1133bef4d /utils.py
parent	fd20589a448cd19d036f18cabb1663c33a24375d (diff)
download	anonbib-871c61c6b4351d4a9dd78ba1d70d6e1af8ffe1e7.tar.gz