1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
import re
import os
ALLCHARS = "".join(map(chr,range(256)))
RE_LONE_AMP = re.compile(r'&([^a-z0-9])')
RE_LONE_I = re.compile(r'\\i([^a-z0-9])')
RE_ACCENT = re.compile(r'\\([\'`~^"c])([^{]|{.})')
RE_LIGATURE = re.compile(r'\\(AE|ae|OE|oe|AA|aa|O|o|ss)([^a-z0-9])')
ACCENT_MAP = { "'" : 'acute',
"`" : 'grave',
"~" : 'tilde',
"^" : 'circ',
'"' : 'uml',
"c" : 'cedil',
}
UNICODE_MAP = { 'ń' : 'ń', }
HTML_LIGATURE_MAP = {
'AE' : 'Æ',
'ae' : 'æ',
'OE' : 'Œ',
'oe' : 'œ',
'AA' : 'Å',
'aa' : 'å',
'O' : 'Ø',
'o' : 'ø',
'ss' : 'ß',
}
RE_TEX_CMD = re.compile(r"(?:\\[a-zA-Z@]+|\\.)")
RE_PAGE_SPAN = re.compile(r"(\d)--(\d)")
def url_untranslate(s):
"""Change a BibTeX key into a string suitable for use in a URL."""
s = re.sub(r'([%<>`#, &_\';])', lambda m: "_%02x" % ord(m.group(1)), s)
s = s.replace("/", ":")
return s
def txtize(s):
"""Turn a TeX string into decnent plaintext."""
s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s)
s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s)
s = RE_LIGATURE.sub(lambda m: "%s%s"%m.groups(), s)
s = RE_TEX_CMD.sub("", s)
s = s.translate(ALLCHARS, "{}")
return s
def unTeXescapeURL(s):
"""Turn a URL as formatted in TeX into a real URL."""
s = s.replace("\\_", "_")
s = s.replace("\\-", "")
s = s.replace("\{}", "")
s = s.replace("{}", "")
return s
def TeXescapeURL(s):
"""Escape a URL for use in TeX"""
s = s.replace("_", "\\_")
s = s.replace("~", "\{}~")
return s
def _unaccent(m):
accent,char = m.groups()
if char[0] == '{':
char = char[1]
accented = "&%s%s;" % (char, ACCENT_MAP[accent])
return UNICODE_MAP.get(accented, accented)
def _unlig_html(m):
return "%s%s"%(HTML_LIGATURE_MAP[m.group(1)],m.group(2))
def htmlize(s):
"""Turn a TeX string into good-looking HTML."""
s = RE_LONE_AMP.sub(lambda m: "&%s" % m.group(1), s)
s = RE_LONE_I.sub(lambda m: "i%s" % m.group(1), s)
s = RE_ACCENT.sub(_unaccent, s)
s = unTeXescapeURL(s)
s = RE_LIGATURE.sub(_unlig_html, s);
s = RE_TEX_CMD.sub("", s)
s = s.translate(ALLCHARS, "{}")
s = RE_PAGE_SPAN.sub(lambda m: "%s-%s"%(m.groups()), s)
s = s.replace("---", "—");
s = s.replace("--", "–");
return s
def smartJoin(*lst):
"""Equivalent to os.path.join, but handle"." and ".." entries a bit better.
"""
lst = [item for item in lst if item != "."]
idx = 0
while idx < len(lst):
if idx > 0 and lst[idx] == "..":
del lst[idx]
else:
idx += 1
return os.path.join(*lst)
def _split(s,w=79,indent=8):
r = []
s = re.sub(r"\s+", " ", s)
first = 1
indentation = ""
while len(s) > w:
for i in xrange(w-1, 20, -1):
if s[i] == ' ':
r.append(indentation+s[:i])
s = s[i+1:]
break
else:
r.append(indentation+s.strip())
s = ""
if first:
first = 0
w -= indent
indentation = " "*indent
if (s):
r.append(indentation+s)
r.append("")
return "\n".join(r)
|