Initial revision

svn:r2
author: Nick Mathewson <nickm@torproject.org> 2003-05-17 06:10:20 +0000
committer: Nick Mathewson <nickm@torproject.org> 2003-05-17 06:10:20 +0000
commit: 1d07a97300d48872726edd989f53bf489dc00a41 (patch)
tree: baa19dd713b183d6694364251b6e17afe9195b0f /perl/BibTeX.pm
parent: 11fddbc0273c37d651399ec782c57065f8030a76 (diff)
download: anonbib-1d07a97300d48872726edd989f53bf489dc00a41.tar.gz
1 files changed, 301 insertions, 0 deletions
diff --git a/perl/BibTeX.pm b/perl/BibTeX.pm
new file mode 100644
index 0000000..35d3467
--- /dev/null
+++ b/perl/BibTeX.pm
@@ -0,0 +1,301 @@
+package BibTeX;
+use Symbol 'qualify_to_ref';
+
+%bibtex_prototypes = ('string' => 'p', 'preamble' => 'v', '_' => 'kp*');
+
+sub parse_bibtex_key ($) {
+  my($fh) = @_;
+  $_ = <$fh> while ((/^\s+$/s || /^\s+%/) && !eof $fh);
+  if (/^\s*([^"#%'(),={}\s]+)(.*)/s) {
+    $_ = $2;
+    lc($1);
+  } else {
+    print STDERR "no key at line $.\n";
+    "";
+  }
+}
+
+sub parse_bibtex_value ($$) {
+  my($fh, $strings) = @_;
+  my($data) = "";
+  my($bracelevel, $line);
+  
+  # loop over concatenation
+  while (1) {
+    
+    # loop over lines
+    $_ = <$fh> while ((/^\s+$/s || /^\s+%/) && !eof $fh);
+    s/^\s+//;
+    if (eof $fh) {
+      print STDERR "unexpected end of file\n";
+      return $data;
+    }
+    
+    # check type of thing
+    if (/^\"(.*)/s) {
+      $_ = $1;
+      $bracelevel = 0;
+      $line = $.;
+      while (1) {
+	if (!$bracelevel && /^([^{}\"]*)\"(.*)/s) {
+	  $data .= $1;
+	  $_ = $2;
+	  last;
+	} elsif ($bracelevel && /^([^{}]*\})(.*)/s) {
+	  $data .= $1;
+	  $_ = $2;
+	  $bracelevel--;
+	} elsif (/^([^{}]*\{)(.*)/s) {
+	  $data .= $1;
+	  $_ = $2;
+	  $bracelevel++;
+	} else {
+	  $data .= $_;
+	  die "end of file within quotes started at line $line" if eof $fh;
+	  $_ = <$fh>;
+	}
+      }
+      
+    } elsif (/^\{(.*)/s) {
+      $_ = $1;
+      $bracelevel = 1;
+      $line = $.;
+      while ($bracelevel) {
+	if (/^([^{}]*)\}(.*)/s) {
+	  $data .= $1;
+	  $data .= "}" if $bracelevel > 1;
+	  $_ = $2;
+	  $bracelevel--;
+	} elsif (/^([^{}]*\{)(.*)/s) {
+	  $data .= $1;
+	  $_ = $2;
+	  $bracelevel++;
+	} else {
+	  $data .= $_;
+	  die "end of file within braces started at line $line" if eof $fh;
+	  $_ = <$fh>;
+	}
+      }
+      
+    } elsif (/^\#/) {
+      # do nothing
+      print STDERR "warning: odd concatenation at line $.\n";
+    } elsif (/^[\},]/) {
+      print STDERR "no data after field at line $.\n" if $data eq '';
+      return $data;
+    } elsif (/^([^\s\},]+)(.*)/s) {
+      if ($strings->{lc($1)}) {
+	$data .= $strings->{lc($1)};
+      } else {
+	$data .= $1;
+      }
+      $_ = $2;
+    }
+    
+    # got a single string, check for concatenation
+    $_ = <$fh> while ((/^\s+$/s || /^\s+%/) && !eof $fh);
+    s/^\s+//;
+    if (/^\#(.*)/s) {
+      $_ = $1;
+    } else {
+      return $data;
+    }
+  }
+}
+
+sub parse_bibtex_entry ($$$$) {
+  # uses caller's $_
+  my($fh, $name, $strings, $entries) = @_;
+  my($entryline) = $.;
+  
+  $_ = <$fh> while /^\s+$/ && !eof $fh;
+  if (/^\s*\{(.*)/s) {
+    $_ = $1;
+  } else {
+    print STDERR "no open brace after \@$name starting at line $entryline\n";
+    return [];
+  }
+  
+  # get prototype
+  my($prototype) = $bibtex_prototypes{$name};
+  $prototype = $bibtex_prototypes{'_'} if !defined $prototype;
+  
+  # parse entry into `@v'
+  my(@v, $a, $b);
+  while (!eof $fh) {
+    $_ = <$fh> while /^\s*$/ && !eof $fh;
+    if (/^\s*\}(.*)/s) {
+      $_ = $1;
+      last;
+    } elsif ($prototype =~ /^k/) {
+      push @v, parse_bibtex_key($fh);
+    } elsif ($prototype =~ /^v/) {
+      push @v, parse_bibtex_value($fh, $strings);
+    } elsif ($prototype =~ /^p/) {
+      push @v, parse_bibtex_key($fh);
+      $_ = <$fh> while /^\s+$/ && !eof $fh;
+      s/^\s+\=?//;
+      push @v, parse_bibtex_value($fh, $strings);
+    }
+    $_ = <$fh> while /^\s*$/ && !eof $fh;
+    s/^\s*,?//;
+    $prototype = substr($prototype, 1)
+	if $prototype && $prototype !~ /^.\*/;
+  }
+  print STDERR "missing args to \@$name at line $.\n"
+      if $prototype && $prototype !~ /^.\*/;
+  
+  # do something with entry
+  if ($name eq 'string') {
+    $strings->{$v[0]} = $v[1];
+  } elsif ($name eq 'preamble') {
+    # do nothing
+  } else {
+    my($key) = shift @v;
+    $entries->{$key} = {@v};
+    $entries->{$key}->{'_type'} = $name;
+    $entries->{$key}->{'_key'} = $key;
+    push @{$entries->{'_'}}, $key;
+  }
+}
+
+sub parse (*;\%) {
+  my($fh) = qualify_to_ref(shift, caller);
+  my($initial_strings) = @_;
+  my($strings) = $initial_strings;
+
+  my($curname, $garbage, %entries);
+  local($_) = '';
+  while (<$fh>) {
+    
+    if (/^\s*[%\#]/ || /^\s*$/) {
+      # comment
+      
+    } elsif (/^\s*\@([^\s\"\#%\'(),={}]+)(.*)/s) {
+      $curname = lc($1);
+      $_ = $2;
+      parse_bibtex_entry($fh, $curname, $strings, \%entries);
+      
+    } else {
+      print STDERR "garbage at line $.\n" if !defined $garbage;
+      $garbage = 1;
+    }
+  }
+
+  \%entries;
+}
+
+sub expand ($$) {
+  my($e, $key) = @_;
+  my(%d) = %{$e->{$key}};
+  while ($d{'crossref'}) {
+    my($v) = $d{'crossref'};
+    delete $d{'crossref'};
+    %d = (%{$e->{$v}}, %d);
+  }
+  \%d;
+}
+
+
+sub split_von ($$$@) {
+  my($f, $v, $l, @x) = @_;
+  my(@pre, $t, $in_von, $tt);
+  while (@x) {
+    $t = $tt = shift @x;
+    if ($tt =~ /^\{\\/) {
+      $tt =~ s/\\[A-Za-z@]+//g;
+      $tt =~ s/\\.//g;
+      $tt =~ tr/{}//d;
+    }
+    if ($tt =~ /^[a-z]/) {
+      push @$v, $t;
+      $in_von = 1;
+    } elsif ($in_von || !ref($f)) {
+      push @$l, $t, @x;
+      return;
+    } else {
+      push @$f, $t;
+    }
+  }
+  if (!$in_von) {
+    push @$l, (pop @$f);
+  }
+}
+
+sub parse_author ($) {
+  local($_) = @_[0];
+  my(@x) = ();
+  my($pos, $pos0, $t, $bracelevel);
+  
+  # move text into @x
+  while (!/^\s*$/) {
+    s/^\s+//;
+    $pos = 0;
+    while ($pos < length) {
+      $t = substr($_, $pos, 1);
+      if ($t eq '{') {
+	$bracelevel++;
+      } elsif ($t eq '}') {
+	$bracelevel--;
+      } elsif ($bracelevel <= 0) {
+	last if ($t =~ /[\s,]/);
+      }
+      $pos++;
+    }
+    
+    push @x, substr($_, 0, $pos);
+    if ($t eq ',') {
+      push @x, ',';
+      $pos++;
+    }
+    $_ = substr($_, $pos);
+  }
+  
+  # split @x into arrays based on `and'
+  my(@aa) = ([]);
+  foreach $t (@x) {
+    if ($t eq 'and') {
+      push @aa, [] if @{$aa[-1]} > 0;
+    } else {
+      push @{$aa[-1]}, $t;
+    }
+  }
+  
+  # massage each subarray into four parts: first, von, last, jr
+  my(@aaa) = ();
+  foreach $t (@aa) {
+    my(@fvl, @vl, @v, @l, @f, @j, $cur, $commas);
+    $cur = \@fvl; $commas = 0;
+    
+    # split into subarrays if possible
+    foreach $x (@$t) {
+      if ($x eq ',') {
+	if ($commas == 0) {
+	  @vl = @fvl;
+	  @fvl = ();
+	  $cur = \@f;
+	} else {
+	  push @j, @f;
+	  @f = ();
+	}
+	$commas++;
+      } else {
+	push @$cur, $x;
+      }
+    }
+    
+    # split out the `von' part
+    if ($commas == 0) {
+      split_von(\@f, \@v, \@l, @fvl);
+    } else {
+      split_von(0, \@v, \@l, @vl);
+    }
+    
+    # store as an array of arrays
+    push @aaa, [[@f], [@v], [@l], [@j]];
+  }
+  
+  @aaa;
+}
+
+1;
author	Nick Mathewson <nickm@torproject.org>	2003-05-17 06:10:20 +0000
committer	Nick Mathewson <nickm@torproject.org>	2003-05-17 06:10:20 +0000
commit	1d07a97300d48872726edd989f53bf489dc00a41 (patch)
tree	baa19dd713b183d6694364251b6e17afe9195b0f /perl/BibTeX.pm
parent	11fddbc0273c37d651399ec782c57065f8030a76 (diff)
download	anonbib-1d07a97300d48872726edd989f53bf489dc00a41.tar.gz