aboutsummaryrefslogtreecommitdiffstats
path: root/perl/BibTeX.pm
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2003-05-17 06:10:20 +0000
committerNick Mathewson <nickm@torproject.org>2003-05-17 06:10:20 +0000
commit1d07a97300d48872726edd989f53bf489dc00a41 (patch)
treebaa19dd713b183d6694364251b6e17afe9195b0f /perl/BibTeX.pm
parent11fddbc0273c37d651399ec782c57065f8030a76 (diff)
downloadanonbib-1d07a97300d48872726edd989f53bf489dc00a41.tar.gz
Initial revision
svn:r2
Diffstat (limited to 'perl/BibTeX.pm')
-rw-r--r--perl/BibTeX.pm301
1 files changed, 301 insertions, 0 deletions
diff --git a/perl/BibTeX.pm b/perl/BibTeX.pm
new file mode 100644
index 0000000..35d3467
--- /dev/null
+++ b/perl/BibTeX.pm
@@ -0,0 +1,301 @@
+package BibTeX;
+use Symbol 'qualify_to_ref';
+
+%bibtex_prototypes = ('string' => 'p', 'preamble' => 'v', '_' => 'kp*');
+
+sub parse_bibtex_key ($) {
+ my($fh) = @_;
+ $_ = <$fh> while ((/^\s+$/s || /^\s+%/) && !eof $fh);
+ if (/^\s*([^"#%'(),={}\s]+)(.*)/s) {
+ $_ = $2;
+ lc($1);
+ } else {
+ print STDERR "no key at line $.\n";
+ "";
+ }
+}
+
+sub parse_bibtex_value ($$) {
+ my($fh, $strings) = @_;
+ my($data) = "";
+ my($bracelevel, $line);
+
+ # loop over concatenation
+ while (1) {
+
+ # loop over lines
+ $_ = <$fh> while ((/^\s+$/s || /^\s+%/) && !eof $fh);
+ s/^\s+//;
+ if (eof $fh) {
+ print STDERR "unexpected end of file\n";
+ return $data;
+ }
+
+ # check type of thing
+ if (/^\"(.*)/s) {
+ $_ = $1;
+ $bracelevel = 0;
+ $line = $.;
+ while (1) {
+ if (!$bracelevel && /^([^{}\"]*)\"(.*)/s) {
+ $data .= $1;
+ $_ = $2;
+ last;
+ } elsif ($bracelevel && /^([^{}]*\})(.*)/s) {
+ $data .= $1;
+ $_ = $2;
+ $bracelevel--;
+ } elsif (/^([^{}]*\{)(.*)/s) {
+ $data .= $1;
+ $_ = $2;
+ $bracelevel++;
+ } else {
+ $data .= $_;
+ die "end of file within quotes started at line $line" if eof $fh;
+ $_ = <$fh>;
+ }
+ }
+
+ } elsif (/^\{(.*)/s) {
+ $_ = $1;
+ $bracelevel = 1;
+ $line = $.;
+ while ($bracelevel) {
+ if (/^([^{}]*)\}(.*)/s) {
+ $data .= $1;
+ $data .= "}" if $bracelevel > 1;
+ $_ = $2;
+ $bracelevel--;
+ } elsif (/^([^{}]*\{)(.*)/s) {
+ $data .= $1;
+ $_ = $2;
+ $bracelevel++;
+ } else {
+ $data .= $_;
+ die "end of file within braces started at line $line" if eof $fh;
+ $_ = <$fh>;
+ }
+ }
+
+ } elsif (/^\#/) {
+ # do nothing
+ print STDERR "warning: odd concatenation at line $.\n";
+ } elsif (/^[\},]/) {
+ print STDERR "no data after field at line $.\n" if $data eq '';
+ return $data;
+ } elsif (/^([^\s\},]+)(.*)/s) {
+ if ($strings->{lc($1)}) {
+ $data .= $strings->{lc($1)};
+ } else {
+ $data .= $1;
+ }
+ $_ = $2;
+ }
+
+ # got a single string, check for concatenation
+ $_ = <$fh> while ((/^\s+$/s || /^\s+%/) && !eof $fh);
+ s/^\s+//;
+ if (/^\#(.*)/s) {
+ $_ = $1;
+ } else {
+ return $data;
+ }
+ }
+}
+
+sub parse_bibtex_entry ($$$$) {
+ # uses caller's $_
+ my($fh, $name, $strings, $entries) = @_;
+ my($entryline) = $.;
+
+ $_ = <$fh> while /^\s+$/ && !eof $fh;
+ if (/^\s*\{(.*)/s) {
+ $_ = $1;
+ } else {
+ print STDERR "no open brace after \@$name starting at line $entryline\n";
+ return [];
+ }
+
+ # get prototype
+ my($prototype) = $bibtex_prototypes{$name};
+ $prototype = $bibtex_prototypes{'_'} if !defined $prototype;
+
+ # parse entry into `@v'
+ my(@v, $a, $b);
+ while (!eof $fh) {
+ $_ = <$fh> while /^\s*$/ && !eof $fh;
+ if (/^\s*\}(.*)/s) {
+ $_ = $1;
+ last;
+ } elsif ($prototype =~ /^k/) {
+ push @v, parse_bibtex_key($fh);
+ } elsif ($prototype =~ /^v/) {
+ push @v, parse_bibtex_value($fh, $strings);
+ } elsif ($prototype =~ /^p/) {
+ push @v, parse_bibtex_key($fh);
+ $_ = <$fh> while /^\s+$/ && !eof $fh;
+ s/^\s+\=?//;
+ push @v, parse_bibtex_value($fh, $strings);
+ }
+ $_ = <$fh> while /^\s*$/ && !eof $fh;
+ s/^\s*,?//;
+ $prototype = substr($prototype, 1)
+ if $prototype && $prototype !~ /^.\*/;
+ }
+ print STDERR "missing args to \@$name at line $.\n"
+ if $prototype && $prototype !~ /^.\*/;
+
+ # do something with entry
+ if ($name eq 'string') {
+ $strings->{$v[0]} = $v[1];
+ } elsif ($name eq 'preamble') {
+ # do nothing
+ } else {
+ my($key) = shift @v;
+ $entries->{$key} = {@v};
+ $entries->{$key}->{'_type'} = $name;
+ $entries->{$key}->{'_key'} = $key;
+ push @{$entries->{'_'}}, $key;
+ }
+}
+
+sub parse (*;\%) {
+ my($fh) = qualify_to_ref(shift, caller);
+ my($initial_strings) = @_;
+ my($strings) = $initial_strings;
+
+ my($curname, $garbage, %entries);
+ local($_) = '';
+ while (<$fh>) {
+
+ if (/^\s*[%\#]/ || /^\s*$/) {
+ # comment
+
+ } elsif (/^\s*\@([^\s\"\#%\'(),={}]+)(.*)/s) {
+ $curname = lc($1);
+ $_ = $2;
+ parse_bibtex_entry($fh, $curname, $strings, \%entries);
+
+ } else {
+ print STDERR "garbage at line $.\n" if !defined $garbage;
+ $garbage = 1;
+ }
+ }
+
+ \%entries;
+}
+
+sub expand ($$) {
+ my($e, $key) = @_;
+ my(%d) = %{$e->{$key}};
+ while ($d{'crossref'}) {
+ my($v) = $d{'crossref'};
+ delete $d{'crossref'};
+ %d = (%{$e->{$v}}, %d);
+ }
+ \%d;
+}
+
+
+sub split_von ($$$@) {
+ my($f, $v, $l, @x) = @_;
+ my(@pre, $t, $in_von, $tt);
+ while (@x) {
+ $t = $tt = shift @x;
+ if ($tt =~ /^\{\\/) {
+ $tt =~ s/\\[A-Za-z@]+//g;
+ $tt =~ s/\\.//g;
+ $tt =~ tr/{}//d;
+ }
+ if ($tt =~ /^[a-z]/) {
+ push @$v, $t;
+ $in_von = 1;
+ } elsif ($in_von || !ref($f)) {
+ push @$l, $t, @x;
+ return;
+ } else {
+ push @$f, $t;
+ }
+ }
+ if (!$in_von) {
+ push @$l, (pop @$f);
+ }
+}
+
+sub parse_author ($) {
+ local($_) = @_[0];
+ my(@x) = ();
+ my($pos, $pos0, $t, $bracelevel);
+
+ # move text into @x
+ while (!/^\s*$/) {
+ s/^\s+//;
+ $pos = 0;
+ while ($pos < length) {
+ $t = substr($_, $pos, 1);
+ if ($t eq '{') {
+ $bracelevel++;
+ } elsif ($t eq '}') {
+ $bracelevel--;
+ } elsif ($bracelevel <= 0) {
+ last if ($t =~ /[\s,]/);
+ }
+ $pos++;
+ }
+
+ push @x, substr($_, 0, $pos);
+ if ($t eq ',') {
+ push @x, ',';
+ $pos++;
+ }
+ $_ = substr($_, $pos);
+ }
+
+ # split @x into arrays based on `and'
+ my(@aa) = ([]);
+ foreach $t (@x) {
+ if ($t eq 'and') {
+ push @aa, [] if @{$aa[-1]} > 0;
+ } else {
+ push @{$aa[-1]}, $t;
+ }
+ }
+
+ # massage each subarray into four parts: first, von, last, jr
+ my(@aaa) = ();
+ foreach $t (@aa) {
+ my(@fvl, @vl, @v, @l, @f, @j, $cur, $commas);
+ $cur = \@fvl; $commas = 0;
+
+ # split into subarrays if possible
+ foreach $x (@$t) {
+ if ($x eq ',') {
+ if ($commas == 0) {
+ @vl = @fvl;
+ @fvl = ();
+ $cur = \@f;
+ } else {
+ push @j, @f;
+ @f = ();
+ }
+ $commas++;
+ } else {
+ push @$cur, $x;
+ }
+ }
+
+ # split out the `von' part
+ if ($commas == 0) {
+ split_von(\@f, \@v, \@l, @fvl);
+ } else {
+ split_von(0, \@v, \@l, @vl);
+ }
+
+ # store as an array of arrays
+ push @aaa, [[@f], [@v], [@l], [@j]];
+ }
+
+ @aaa;
+}
+
+1;