import re import os PRINTINGCHARS = "\t\n\r"+"".join(map(chr,range(32, 127))) ALLCHARS = "".join(map(chr,range(256))) RE_LONE_AMP = re.compile(r'&([^a-z0-9])') RE_LONE_I = re.compile(r'\\i([^a-z0-9])') RE_ACCENT = re.compile(r'\\([\'`~^"c])([^{]|{.})') RE_LIGATURE = re.compile(r'\\(AE|ae|OE|oe|AA|aa|O|o|ss)([^a-z0-9])') ACCENT_MAP = { "'" : 'acute', "`" : 'grave', "~" : 'tilde', "^" : 'circ', '"' : 'uml', "c" : 'cedil', } UNICODE_MAP = { 'ń' : 'ń', } HTML_LIGATURE_MAP = { 'AE' : 'Æ', 'ae' : 'æ', 'OE' : 'Œ', 'oe' : 'œ', 'AA' : 'Å', 'aa' : 'å', 'O' : 'Ø', 'o' : 'ø', 'ss' : 'ß', } RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)") RE_PAGE_SPAN = re.compile(r"(\d)--(\d)") def url_untranslate(s): """Change a BibTeX key into a string suitable for use in a URL.""" s = re.sub(r'([%<>`#, &_\';])', lambda m: "_%02x" % ord(m.group(1)), s) s = s.replace("/", ":") return s def txtize(s): """Turn a TeX string into decnent plaintext.""" s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s) s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s) s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s) s = RE_TEX_CMD.sub("", s) s = s.translate(ALLCHARS, "{}") return s def unTeXescapeURL(s): """Turn a URL as formatted in TeX into a real URL.""" s = s.replace("\\_", "_") s = s.replace("\\-", "") s = s.replace("\{}", "") s = s.replace("{}", "") return s def TeXescapeURL(s): """Escape a URL for use in TeX""" s = s.replace("_", "\\_") s = s.replace("~", "\{}~") return s def _unaccent(m): accent,char = m.groups() if char[0] == '{': char = char[1] accented = "&%s%s;" % (char, ACCENT_MAP[accent]) return UNICODE_MAP.get(accented, accented) def _unlig_html(m): return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)],m.group(2)) def htmlize(s): """Turn a TeX string into good-looking HTML.""" s = RE_LONE_AMP.sub(lambda m: "&%s" % m.group(1), s) s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s) s = RE_ACCENT.sub(_unaccent, s) s = unTeXescapeURL(s) s = RE_LIGATURE.sub(_unlig_html, s); s = RE_TEX_CMD.sub("", s) s = s.translate(ALLCHARS, "{}") s = RE_PAGE_SPAN.sub(lambda m: "%s-%s"%(m.groups()), s) s = s.replace("---", "—"); s = s.replace("--", "–"); return s def smartJoin(*lst): """Equivalent to os.path.join, but handle"." and ".." entries a bit better. """ lst = [item for item in lst if item != "."] idx = 0 while idx < len(lst): if idx > 0 and lst[idx] == "..": del lst[idx] else: idx += 1 return os.path.join(*lst) def _split(s,w=79,indent=8): r = [] s = re.sub(r"\s+", " ", s) first = 1 indentation = "" while len(s) > w: for i in xrange(w-1, 20, -1): if s[i] == ' ': r.append(indentation+s[:i]) s = s[i+1:] break else: r.append(indentation+s.strip()) s = "" if first: first = 0 w -= indent indentation = " "*indent if (s): r.append(indentation+s) r.append("") return "\n".join(r)