diff options
| author | Nick Mathewson <nickm@torproject.org> | 2003-05-17 06:18:24 +0000 |
|---|---|---|
| committer | Nick Mathewson <nickm@torproject.org> | 2003-05-17 06:18:24 +0000 |
| commit | 0d6150b63cd5a698e647a4809e65a262f55902cb (patch) | |
| tree | c2269f85b90faf0585b6ea245b119d75d613ad5e /perl-v2/BibTeX.pm | |
| parent | 1d07a97300d48872726edd989f53bf489dc00a41 (diff) | |
| download | anonbib-0d6150b63cd5a698e647a4809e65a262f55902cb.tar.gz | |
Adding second version of perl files, css support, first take on anon bib
svn:r6
Diffstat (limited to 'perl-v2/BibTeX.pm')
| -rw-r--r-- | perl-v2/BibTeX.pm | 303 |
1 files changed, 303 insertions, 0 deletions
diff --git a/perl-v2/BibTeX.pm b/perl-v2/BibTeX.pm new file mode 100644 index 0000000..17bc392 --- /dev/null +++ b/perl-v2/BibTeX.pm @@ -0,0 +1,303 @@ +package BibTeX; +use Symbol 'qualify_to_ref'; + +%bibtex_prototypes = ('string' => 'p', 'preamble' => 'v', '_' => 'kp*'); + +sub parse_bibtex_key ($) { + my($fh) = @_; + $_ = <$fh> while ((/^\s+$/s || /^\s+%/) && !eof $fh); + if (/^\s*([^"#%'(),={}\s]+)(.*)/s) { + $_ = $2; + lc($1); + } else { + print STDERR "no key at line $.\n"; + ""; + } +} + +sub parse_bibtex_value ($$) { + my($fh, $strings) = @_; + my($data) = ""; + my($bracelevel, $line); + + # loop over concatenation + while (1) { + + # loop over lines + $_ = <$fh> while ((/^\s+$/s || /^\s+%/) && !eof $fh); + s/^\s+//; + if (eof $fh) { + print STDERR "unexpected end of file\n"; + return $data; + } + + # check type of thing + if (/^\"(.*)/s) { + $_ = $1; + $bracelevel = 0; + $line = $.; + while (1) { + if (!$bracelevel && /^([^{}\"]*)\"(.*)/s) { + $data .= $1; + $_ = $2; + last; + } elsif ($bracelevel && /^([^{}]*\})(.*)/s) { + $data .= $1; + $_ = $2; + $bracelevel--; + } elsif (/^([^{}]*\{)(.*)/s) { + $data .= $1; + $_ = $2; + $bracelevel++; + } else { + $data .= $_; + die "end of file within quotes started at line $line" if eof $fh; + $_ = <$fh>; + } + } + + } elsif (/^\{(.*)/s) { + $_ = $1; + $bracelevel = 1; + $line = $.; + while ($bracelevel) { + if (/^([^{}]*)\}(.*)/s) { + $data .= $1; + $data .= "}" if $bracelevel > 1; + $_ = $2; + $bracelevel--; + } elsif (/^([^{}]*\{)(.*)/s) { + $data .= $1; + $_ = $2; + $bracelevel++; + } else { + $data .= $_; + die "end of file within braces started at line $line" if eof $fh; + $_ = <$fh>; + } + } + + } elsif (/^\#/) { + # do nothing + print STDERR "warning: odd concatenation at line $.\n"; + } elsif (/^[\},]/) { + print STDERR "no data after field at line $.\n" if $data eq ''; + return $data; + } elsif (/^([^\s\},]+)(.*)/s) { + if ($strings->{lc($1)}) { + $data .= $strings->{lc($1)}; + } else { + $data .= $1; + } + $_ = $2; + } + + # got a single string, check for concatenation + $_ = <$fh> while ((/^\s+$/s || /^\s+%/) && !eof $fh); + s/^\s+//; + if (/^\#(.*)/s) { + $_ = $1; + } else { + return $data; + } + } +} + +sub parse_bibtex_entry ($$$$) { + # uses caller's $_ + my($fh, $name, $strings, $entries) = @_; + my($entryline) = $.; + + $_ = <$fh> while /^\s+$/ && !eof $fh; + if (/^\s*\{(.*)/s) { + $_ = $1; + } else { + print STDERR "no open brace after \@$name starting at line $entryline\n"; + return []; + } + + # get prototype + my($prototype) = $bibtex_prototypes{$name}; + $prototype = $bibtex_prototypes{'_'} if !defined $prototype; + + # parse entry into `@v' + my(@v, $a, $b); + while (!eof $fh) { + $_ = <$fh> while /^\s*$/ && !eof $fh; + if (/^\s*\}(.*)/s) { + $_ = $1; + last; + } elsif ($prototype =~ /^k/) { + push @v, parse_bibtex_key($fh); + } elsif ($prototype =~ /^v/) { + push @v, parse_bibtex_value($fh, $strings); + } elsif ($prototype =~ /^p/) { + push @v, parse_bibtex_key($fh); + $_ = <$fh> while /^\s+$/ && !eof $fh; + s/^\s+\=?//; + push @v, parse_bibtex_value($fh, $strings); + } + $_ = <$fh> while /^\s*$/ && !eof $fh; + s/^\s*,?//; + $prototype = substr($prototype, 1) + if $prototype && $prototype !~ /^.\*/; + } + print STDERR "missing args to \@$name at line $.\n" + if $prototype && $prototype !~ /^.\*/; + + # do something with entry + if ($name eq 'string') { + $strings->{$v[0]} = $v[1]; + } elsif ($name eq 'preamble') { + # do nothing + } else { + my($key) = shift @v; + $entries->{$key} = {@v}; + $entries->{$key}->{'_type'} = $name; + $entries->{$key}->{'_key'} = $key; + push @{$entries->{'_'}}, $key; + } +} + +sub parse (*;\%) { + my($fh) = qualify_to_ref(shift, caller); + my($initial_strings) = @_; + my($strings) = $initial_strings; + + my($curname, $garbage, %entries); + local($_) = ''; + while (<$fh>) { + + if (/^\s*[%\#]/ || /^\s*$/) { + # comment + + } elsif (/^\s*\@([^\s\"\#%\'(),={}]+)(.*)/s) { + $curname = lc($1); + $_ = $2; + parse_bibtex_entry($fh, $curname, $strings, \%entries); + + } else { + print STDERR "garbage at line $.\n" if !defined $garbage; + $garbage = 1; + } + } + + \%entries; +} + +sub expand ($$) { + my($e, $key) = @_; + my(%d) = %{$e->{$key}}; + while ($d{'crossref'}) { + my($v) = $d{'crossref'}; + delete $d{'crossref'}; + %d = (%{$e->{$v}}, %d); + } + \%d; +} + + +sub split_von ($$$@) { + my($f, $v, $l, @x) = @_; + my(@pre, $t, $in_von, $tt); + while (@x) { + $t = $tt = shift @x; + if ($tt =~ /^\{\\/) { + $tt =~ s/\\[A-Za-z@]+//g; + $tt =~ s/\\.//g; + $tt =~ tr/{}//d; + } + if ($tt =~ /^[a-z]/) { + push @$v, $t; + $in_von = 1; + } elsif ($in_von || !ref($f)) { + push @$l, $t, @x; + return; + } else { + push @$f, $t; + } + } + if (!$in_von) { + push @$l, (pop @$f); + } +} + +sub parse_author ($) { + local($_) = $_[0]; + my(@x) = (); + my($pos, $pos0, $t, $bracelevel); + + $bracelevel = 0; + + # move text into @x + while (!/^\s*$/) { + s/^\s+//; + $pos = 0; + while ($pos < length) { + $t = substr($_, $pos, 1); + if ($t eq '{') { + $bracelevel++; + } elsif ($t eq '}') { + $bracelevel--; + } elsif ($bracelevel <= 0) { + last if ($t =~ /[\s,]/); + } + $pos++; + } + + push @x, substr($_, 0, $pos); + if ($t eq ',') { + push @x, ','; + $pos++; + } + $_ = substr($_, $pos); + } + + # split @x into arrays based on `and' + my(@aa) = ([]); + foreach $t (@x) { + if ($t eq 'and') { + push @aa, [] if @{$aa[-1]} > 0; + } else { + push @{$aa[-1]}, $t; + } + } + + # massage each subarray into four parts: first, von, last, jr + my(@aaa) = (); + foreach $t (@aa) { + my(@fvl, @vl, @v, @l, @f, @j, $cur, $commas); + $cur = \@fvl; $commas = 0; + + # split into subarrays if possible + foreach $x (@$t) { + if ($x eq ',') { + if ($commas == 0) { + @vl = @fvl; + @fvl = (); + $cur = \@f; + } else { + push @j, @f; + @f = (); + } + $commas++; + } else { + push @$cur, $x; + } + } + + # split out the `von' part + if ($commas == 0) { + split_von(\@f, \@v, \@l, @fvl); + } else { + split_von(0, \@v, \@l, @vl); + } + + # store as an array of arrays + push @aaa, [[@f], [@v], [@l], [@j]]; + } + + @aaa; +} + +1; |
