aboutsummaryrefslogtreecommitdiffstats
path: root/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils.py')
-rw-r--r--utils.py118
1 files changed, 118 insertions, 0 deletions
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..4d4b583
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,118 @@
+import re
+import os
+
+ALLCHARS = "".join(map(chr,range(256)))
+RE_LONE_AMP = re.compile(r'&([^a-z0-9])')
+RE_LONE_I = re.compile(r'\\i([^a-z0-9])')
+RE_ACCENT = re.compile(r'\\([\'`~^"c])([^{]|{.})')
+RE_LIGATURE = re.compile(r'\\(AE|ae|OE|oe|AA|aa|O|o|ss)([^a-z0-9])')
+ACCENT_MAP = { "'" : 'acute',
+ "`" : 'grave',
+ "~" : 'tilde',
+ "^" : 'circ',
+ '"' : 'uml',
+ "c" : 'cedil',
+ }
+
+UNICODE_MAP = { 'ń' : 'ń', }
+HTML_LIGATURE_MAP = {
+ 'AE' : 'Æ',
+ 'ae' : 'æ',
+ 'OE' : 'Œ',
+ 'oe' : 'œ',
+ 'AA' : 'Å',
+ 'aa' : 'å',
+ 'O' : 'Ø',
+ 'o' : 'ø',
+ 'ss' : 'ß',
+ }
+RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)")
+RE_PAGE_SPAN = re.compile(r"(\d)--(\d)")
+
+def url_untranslate(s):
+ """Change a BibTeX key into a string suitable for use in a URL."""
+ s = re.sub(r'([%<>`#, &_\';])', lambda m: "_%02x" % ord(m.group(1)), s)
+ s = s.replace("/", ":")
+ return s
+
+def txtize(s):
+ """Turn a TeX string into decnent plaintext."""
+ s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s)
+ s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s)
+ s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s)
+ s = RE_TEX_CMD.sub("", s)
+ s = s.translate(ALLCHARS, "{}")
+ return s
+
+def unTeXescapeURL(s):
+ """Turn a URL as formatted in TeX into a real URL."""
+ s = s.replace("\\_", "_")
+ s = s.replace("\\-", "")
+ s = s.replace("\{}", "")
+ s = s.replace("{}", "")
+ return s
+
+def TeXescapeURL(s):
+ """Escape a URL for use in TeX"""
+ s = s.replace("_", "\\_")
+ s = s.replace("~", "\{}~")
+ return s
+
+def _unaccent(m):
+ accent,char = m.groups()
+ if char[0] == '{':
+ char = char[1]
+ accented = "&%s%s;" % (char, ACCENT_MAP[accent])
+ return UNICODE_MAP.get(accented, accented)
+
+def _unlig_html(m):
+ return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)],m.group(2))
+
+def htmlize(s):
+ """Turn a TeX string into good-looking HTML."""
+ s = RE_LONE_AMP.sub(lambda m: "&amp;%s" % m.group(1), s)
+ s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s)
+ s = RE_ACCENT.sub(_unaccent, s)
+ s = unTeXescapeURL(s)
+ s = RE_LIGATURE.sub(_unlig_html, s);
+ s = RE_TEX_CMD.sub("", s)
+ s = s.translate(ALLCHARS, "{}")
+ s = RE_PAGE_SPAN.sub(lambda m: "%s-%s"%(m.groups()), s)
+ s = s.replace("---", "&mdash;");
+ s = s.replace("--", "&ndash;");
+ return s
+
+def smartJoin(*lst):
+ """Equivalent to os.path.join, but handle"." and ".." entries a bit better.
+ """
+ lst = [item for item in lst if item != "."]
+ idx = 0
+ while idx < len(lst):
+ if idx > 0 and lst[idx] == "..":
+ del lst[idx]
+ else:
+ idx += 1
+ return os.path.join(*lst)
+
+def _split(s,w=79,indent=8):
+ r = []
+ s = re.sub(r"\s+", " ", s)
+ first = 1
+ indentation = ""
+ while len(s) > w:
+ for i in xrange(w-1, 20, -1):
+ if s[i] == ' ':
+ r.append(indentation+s[:i])
+ s = s[i+1:]
+ break
+ else:
+ r.append(indentation+s.strip())
+ s = ""
+ if first:
+ first = 0
+ w -= indent
+ indentation = " "*indent
+ if (s):
+ r.append(indentation+s)
+ r.append("")
+ return "\n".join(r)