More work; get by-date working, cleanups, debugging, etc

svn:r9
author: Nick Mathewson <nickm@torproject.org> 2003-05-19 18:09:21 +0000
committer: Nick Mathewson <nickm@torproject.org> 2003-05-19 18:09:21 +0000
commit: 6e23d475308c780f0312795b201d7d44143d0f0e (patch)
tree: 44fc36f6c03f23b6fda535f80eb5b3b05dd5ba49
parent: c07e917777582ff1e70e8b57345397ef977d684e (diff)
download: anonbib-6e23d475308c780f0312795b201d7d44143d0f0e.tar.gz
6 files changed, 174 insertions, 42 deletions
diff --git a/.cvsignore b/.cvsignore
index 52e4e61..d13d323 100644
--- a/.cvsignore
+++ b/.cvsignore
@@ -1,2 +1,4 @@
 *.pyc
 *.pyo
+date.html
+topic.html
diff --git a/BibTeX.py b/BibTeX.py
index 9444a65..9b0d38c 100644
--- a/BibTeX.py
+++ b/BibTeX.py
@@ -10,6 +10,10 @@ __all__ = ( 'ParseError', 'BibTeX', 'BibTeXEntry', 'htmlize',
             'ParsedAuthor', 'FileIter', 'Parser', 'parseFile',
             'splitBibTeXEntriesBy',
             'sortBibTexEntriesBy', )
+
+MONTHS = [ None,
+           "January", "February", "March", "April", "May", "June",
+           "July", "August", "September", "October", "November", "December"]
             
 class ParseError(Exception):
     pass
@@ -34,7 +38,6 @@ class BibTeX:
                     cr = self.byKey[ent['crossref'].lower()]
                 except KeyError:
                     print "No such crossref: %s", ent['crossref']
-                    print ent
                     break
                 if seen.get(cr.key):
                     raise ParseError("Circular crossref at %s" % ent.key)
@@ -60,10 +63,54 @@ def splitEntriesBy(entries, field):
             result[key] = [ent]
     return result
 
-def sortEntriesBy(self, field):
-    tmp = [ (ent.get(field), ent) for ent in entries ]
+def splitSortedEntriesBy(entries, field):
+    result = []
+    curVal = "alskjdsakldj"
+    curList = []
+    for ent in entries:
+        key = ent.get(field)
+        if key == curVal:
+            curList.append(ent)
+        else:
+            curVal = key
+            curList = [ent]
+            result.append((curVal, curList))
+    return result
+
+def sortEntriesBy(entries, field, default):
+    tmp = []
+    for ent in entries:
+        tmp = [ (txtize(ent.get(field, default)), ent) for ent in entries ]
+    tmp.sort()
+    return [ t[1] for t in tmp ]
+
+def sortEntriesByAuthor(entries):
+    tmp = []
+    for ent in entries:
+        authors = [ txtize(" ".join(a.von+a.last+a.first+a.jr))
+                    for a in ent.parsedAuthor ]
+        tmp.append((tuple(authors), ent))
+    tmp.sort()
+    return [ t[1] for t in tmp ]
+
+def sortEntriesByDate(entries):
+    tmp = []
+    for ent in entries:
+        try:
+            mon = MONTHS.index(ent.get("month"))
+        except ValueError:
+            print "Unknown month %r in %s"%(ent.get("month"), ent.key)
+            mon = 0
+
+        try:
+            date = int(ent['year'])*13 + mon
+        except KeyError:
+            print "ERROR: No year field in %s"%ent.key
+            date = 10000*13
+        tmp.append((date, ent))
     tmp.sort()
-    return [ t[2] for t in tmp ]
+    return [ t[1] for t in tmp ]
+    
 
 DISPLAYED_FIELDS = [ 'title', 'author', 'journal', 'booktitle',
 'school', 'institution', 'organization', 'volume', 'number', 'year',
@@ -287,6 +334,14 @@ def htmlize(s):
     s = RE_PAGE_SPAN.sub(lambda m: "%s-%s"%(m.groups()), s)
     return s
 
+def txtize(s):
+    s = RE_LONE_I.sub(lambda m: "%s" % m.group(1), s)
+    s = RE_ACCENT.sub(lambda m: "%s" % m.group(2), s)
+    s = RE_TEX_CMD.sub("", s)
+    s = s.translate(ALLCHARS, "{}")
+    return s
+    
+
 PROCEEDINGS_RE = re.compile(
                         r'((?:proceedings|workshop record) of(?: the)? )(.*)',
                         re.I)
diff --git a/_template_.html b/_template_.html
index 7dafac9..ae9b05e 100644
--- a/_template_.html
+++ b/_template_.html
@@ -70,13 +70,12 @@ width="1" height="1" alt=""></td>
 -->
 
 <h1 align="center">Anonymity bibliography</h1>
-%(choices)s
-<p align="center">By subject | <a href="/cgi-bin/pubs-date.cgi">By date</a></p>
+<p align="center">%(choices)s</p>
 
-<table cellspacing="0" cellpadding="0" border="0" width="100%">
+<table cellspacing="0" cellpadding="0" border="0" width="100">
 
 <tr valign="top">
-<td width="10%" height="24"><br></td>
+<td width="10%%" height="24"><br></td>
 </tr>
 
 <tr valign="top">
@@ -94,11 +93,6 @@ width="1" height="1" alt=""></td>
 <td rowspan="5" width="1" bgcolor="#92a6a4"><img src="/img/emptydot.gif"
 width="1" height="1" alt=""></td>
 
-<td bgcolor="#ccffff"><p class="l1"><br><form action="/cgi-bin/pubs-date.cgi"
-method="get"><strong>Publication search:</strong><br>
-<small><input type=entry name=match size=15> <input type=submit
-value="Go"><br></small></form></p></td>
-
 <td width="8" bgcolor="#ccffff"><img src="/img/emptydot.gif"
 width="8" height="1" alt=""></td>
 <td rowspan="3" width="1" bgcolor="#92a6a4"><img src="/img/emptydot.gif"
@@ -114,7 +108,7 @@ width="1" height="1" alt=""></td>
 
 <tr valign="top">
 <td bgcolor="#ccffff"><p class="l1"><br><form action="/cgi-bin/pubs-date.cgi"
-method="get"><strong>Subjects:</strong><br>
+method="get"><strong>%(sectiontypes)s:</strong><br>
 
 %(sections)s
 
@@ -139,7 +133,7 @@ width="1" height="1" alt=""></td>
 </table>
 </div></td>
 
-<td width="75%">
+<td width="75%%">
 
 <h2>Publications by %(field)s</h2>
 
@@ -149,7 +143,7 @@ width="1" height="1" alt=""></td>
 
 </td>
 
-<td width="15%"><br></td>
+<td width="15%%"><br></td>
 
 </tr>
 </table>
diff --git a/config.py b/config.py
index cf7a4b2..7138fdb 100644
--- a/config.py
+++ b/config.py
@@ -1,6 +1,10 @@
 
 import re
 
+MASTER_BIB = "./anonbib.bib"
+
+OUTPUT_DIR = "."
+
 AUTHOR_URLS = {
     'Berthold' : 'http://page.inf.fu-berlin.de/~berthold/',
     'Miguel.*Castro' : 'http://research.microsoft.com/users/mcastro/',
@@ -21,6 +25,7 @@ AUTHOR_URLS = {
     'David.*Wagner' : 'http://www.cs.berkeley.edu/~daw/',
     'Shoup' : 'http://www.shoup.net/',
     'B.*M&ouml;ller' : 'http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html',
+    'Michael.*Freedman' : 'http://www.scs.cs.nyu.edu/~mfreed/',
     
     }
 
@@ -35,6 +40,7 @@ INITIAL_STRINGS = {
 
 OMIT_ENTRIES = ("proceedings", "journal")
 
+
 ### Don't edit below this line
 
 AUTHOR_RE_LIST = [
diff --git a/reconcile.py b/reconcile.py
new file mode 100644
index 0000000..8d5063a
--- /dev/null
+++ b/reconcile.py
@@ -0,0 +1,2 @@
+#!/usr/bin/python
+
diff --git a/writeHTML.py b/writeHTML.py
index 0d2d41a..8037abd 100644
--- a/writeHTML.py
+++ b/writeHTML.py
@@ -1,40 +1,113 @@
 #!/usr/bin/python
 
+import re
+
 import BibTeX
 import config
 
-bib = BibTeX.parseFile("anonbib.bib")
+TEMPLATE_S, TEMPLATE_E = None, None
 
-f = open("_template_.html")
-template = f.read()
-f.close()
+def getTemplate():
+    global TEMPLATE_S
+    global TEMPLATE_E
+    if not TEMPLATE_S:
+        f = open("_template_.html")
+        template = f.read()
+        f.close()
+        TEMPLATE_S, TEMPLATE_E = template.split("%(entries)s")
+    return TEMPLATE_S, TEMPLATE_E
+
+def url_untranslate(s):
+    s = s.replace(" ", "+")
+    s = re.sub(r'([%<>])',
+               lambda m: "%%%02x"%ord(m.group(1)),
+               s)
+    return s
 
-f = open("z.html", 'w')
+def writeBody(f, sections):
+    '''f: an open file 
+       sections: list of (sectionname, [list of BibTeXEntry])'''
+    for s, entries in sections:
+        print >>f, ('<h2><a name="%s">%s</a></h2>'%(url_untranslate(s),s))
+        print >>f, "<ul class='expand'>"
+        for e in entries:
+            print >>f, e.to_html()
+        print >>f, "</ul>"
 
-template_s, template_e = template.split("%(entries)s")
+def writeHTML(f, sections, sectionType, fieldName, choices):
+    """sections: list of (sectionname, [list of BibTeXEntry])'''
+       sectionType: str
+       fieldName: str
+       choices: list of (choice, url)"""
+    #
+    secStr = []
+    for s, _ in sections:
+        secStr.append("<p class='l2'><a href='#%s'>%s</a></p>\n"%
+                      ((url_untranslate(s),s)))
+    secStr = "".join(secStr)
+    
+    # 
+    choiceStr = []
+    for choice, url in choices:
+        if url:
+            choiceStr.append("<a href='%s'>%s</a>"%(url, choice))
+        else:
+            choiceStr.append(choice)
+        
+    choiceStr = "<p align='center'>%s</p>" % (" | ".join(choiceStr))
 
-print >>f, template_s
+    fields = { 'command_line' :  "",
+               'sectiontypes' :  sectionType,
+               'choices' : choiceStr,
+               'field': fieldName,
+               'sections' : secStr,
+         }
 
-entries = BibTeX.splitEntriesBy(bib.entries, "www_section")
-sections = entries.keys()
-sections.sort()
-if entries.has_key(None):
-    for ent in entries[None]:
-        ent['www_section'] = "Miscellaneous"
+    header, footer = getTemplate()
+    print >>f, header%fields
+    writeBody(f, sections)
+    print >>f, footer%fields
+    
+bib = BibTeX.parseFile(config.MASTER_BIB)
 
-    entries["Miscellaneous"] = entries[None]
-    del entries[None]
-    sections.append("Miscellaneous")
-    sections = filter(None, sections)
+##### Sorted views:
 
-for s in sections:
-    #XXX print '<h3><a name="', url_untranslate($section), '">';
-    print >>f, '<h3>%s</h3>'%s
-    print >>f, "<ul class='expand'>"
-    for e in entries[s]:
-        print >>f, e.to_html()
-    print >>f, "</ul>"
-                     
+## By topic.
 
-print >>f, template_e
+entries = BibTeX.sortEntriesBy(bib.entries, "www_section", "ZZZZZZZZZZZZZZZZZ")
+entries = BibTeX.splitSortedEntriesBy(entries, "www_section")
+if entries[-1][0] is None:
+    entries[-1] = ("Miscellaneous", entries[-1][1])
+
+entries = [ (s, BibTeX.sortEntriesByAuthor(ents))
+            for s, ents in entries
+            ]
+
+f = open("topic.html", 'w')
+writeHTML(f, entries, "Topics", "topic",
+          (("By topic", None),
+           ("By date", "./date.html")))
+f.close()
+
+## By date.
+
+entries = BibTeX.sortEntriesByDate(bib.entries)
+entries = BibTeX.splitSortedEntriesBy(entries, 'year')
+if entries[-1][0] == None:
+    entries[-1] = ("Unknown", entries[-1][1])
+sections = [ ent[0] for ent in entries ]
+
+first_year = int(entries[0][1][0]['year'])
+last_year = int(entries[-1][1][0].get('year',
+                                      entries[-2][1][0]['year']))
+years = map(str, range(first_year, last_year+1))
+if entries[-1][0] == 'Unknown':
+    years.append("Unknown")
+
+f = open("date.html", 'w')
+writeHTML(f, entries, "Years", "date",
+          (("By topic", "./topic.html"),
+           ("By date", None)))
+f.close()
 
+## The big BibTeX
author	Nick Mathewson <nickm@torproject.org>	2003-05-19 18:09:21 +0000
committer	Nick Mathewson <nickm@torproject.org>	2003-05-19 18:09:21 +0000
commit	6e23d475308c780f0312795b201d7d44143d0f0e (patch)
tree	44fc36f6c03f23b6fda535f80eb5b3b05dd5ba49
parent	c07e917777582ff1e70e8b57345397ef977d684e (diff)
download	anonbib-6e23d475308c780f0312795b201d7d44143d0f0e.tar.gz