This commit is contained in:
2024-10-14 00:08:40 +02:00
parent dbfba56f66
commit 1462d52e13
4572 changed files with 2658864 additions and 0 deletions

View File

@@ -0,0 +1,141 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::Array;
use base 'PDF::API2::Basic::PDF::Objind';
use strict;
our $VERSION = '2.033'; # VERSION
=head1 NAME
PDF::API2::Basic::PDF::Array - Corresponds to a PDF array. Inherits from L<PDF::Objind>
=head1 METHODS
=head2 PDF::Array->new($parent, @vals)
Creates an array with the given storage parent and an optional list of values to
initialise the array with.
=cut
sub new {
my ($class, @vals) = @_;
my $self = {};
$self->{' val'} = [@vals];
$self->{' realised'} = 1;
bless $self, $class;
return $self;
}
=head2 $a->outobjdeep($fh, $pdf)
Outputs an array as a PDF array to the given filehandle.
=cut
sub outobjdeep {
my ($self, $fh, $pdf, %opts) = @_;
$fh->print('[ ');
foreach my $obj (@{$self->{' val'}}) {
$obj->outobj($fh, $pdf);
$fh->print(' ');
}
$fh->print(']');
}
=head2 $a->removeobj($elem)
Removes all occurrences of an element from an array.
=cut
sub removeobj {
my ($self, $elem) = @_;
$self->{' val'} = [grep($_ ne $elem, @{$self->{' val'}})];
}
=head2 $a->elementsof
Returns a list of all the elements in the array. Notice that this is
not the array itself but the elements in the array.
Also available as C<elements>.
=cut
sub elementsof {
return wantarray ? @{$_[0]->{' val'}} : scalar @{$_[0]->{' val'}};
}
sub elements {
my $self = shift();
return @{$self->{' val'}};
}
=head2 $a->add_elements
Appends the given elements to the array. An element is only added if it
is defined.
=cut
sub add_elements {
my $self = shift();
foreach my $e (@_) {
push @{$self->{' val'}}, $e if defined $e;
}
return $self;
}
=head2 $a->val
Returns the value of the array, this is a reference to the actual array
containing the elements.
=cut
sub val {
return $_[0]->{' val'};
}
=head2 $a->copy($pdf)
Copies the array with deep-copy on elements which are not full PDF objects
with respect to a particular $pdf output context
=cut
sub copy {
my ($self, $pdf) = @_;
my $res = $self->SUPER::copy($pdf);
$res->{' val'} = [];
foreach my $e (@{$self->{' val'}}) {
if (ref($e) and $e->can('is_obj') and not $e->is_obj($pdf)) {
push(@{$res->{' val'}}, $e->copy($pdf));
}
else {
push(@{$res->{' val'}}, $e);
}
}
return $res;
}
1;

View File

@@ -0,0 +1,48 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::Bool;
use base 'PDF::API2::Basic::PDF::String';
use strict;
our $VERSION = '2.033'; # VERSION
=head1 NAME
PDF::API2::Basic::PDF::Bool - A special form of L<PDF::String> which holds the strings
B<true> or B<false>
=head1 METHODS
=head2 $b->convert($str)
Converts a string into the string which will be stored.
=cut
sub convert {
return $_[1] eq 'true';
}
=head2 as_pdf
Converts the value to a PDF output form
=cut
sub as_pdf {
return $_[0]->{'val'} ? 'true' : 'false';
}
1;

View File

@@ -0,0 +1,329 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::Dict;
use base 'PDF::API2::Basic::PDF::Objind';
use strict;
no warnings qw[ deprecated recursion uninitialized ];
our $VERSION = '2.033'; # VERSION
our $mincache = 16 * 1024 * 1024;
use File::Temp;
use PDF::API2::Basic::PDF::Array;
use PDF::API2::Basic::PDF::Filter;
use PDF::API2::Basic::PDF::Name;
=head1 NAME
PDF::API2::Basic::PDF::Dict - PDF Dictionaries and Streams. Inherits from L<PDF::Objind>
=head1 INSTANCE VARIABLES
There are various special instance variables which are used to look after,
particularly, streams. Each begins with a space:
=over
=item stream
Holds the stream contents for output
=item streamfile
Holds the stream contents in an external file rather than in memory. This is
not the same as a PDF file stream. The data is stored in its unfiltered form.
=item streamloc
If both ' stream' and ' streamfile' are empty, this indicates where in the
source PDF the stream starts.
=back
=head1 METHODS
=cut
sub new {
my ($class) = @_;
$class = ref($class) if ref($class);
my $self = $class->SUPER::new(@_);
$self->{' realised'} = 1;
return $self;
}
=head2 $type = $d->type($type)
Get/Set the standard Type key. It can be passed, and will return, a text value rather than a Name object.
=cut
sub type {
my $self = shift();
if (scalar @_) {
my $type = shift();
$self->{'Type'} = ref($type) ? $type : PDF::API2::Basic::PDF::Name->new($type);
}
return unless exists $self->{'Type'};
return $self->{'Type'}->val();
}
=head2 @filters = $d->filter(@filters)
Get/Set one or more filters being used by the optional stream attached to the dictionary.
=cut
sub filter {
my ($self, @filters) = @_;
# Developer's Note: the PDF specification allows Filter to be
# either a name or an array, but other parts of this codebase
# expect an array. If these are updated uncomment the
# commented-out lines in order to accept both types.
# if (scalar @filters == 1) {
# $self->{'Filter'} = ref($filters[0]) ? $filters[0] : PDF::API2::Basic::PDF::Name->new($filters[0]);
# }
# elsif (scalar @filters) {
@filters = map { ref($_) ? $_ : PDF::API2::Basic::PDF::Name->new($_) } @filters;
$self->{'Filter'} = PDF::API2::Basic::PDF::Array->new(@filters);
# }
}
# Undocumented alias, which may be removed in a future release
sub filters { return filter(@_); }
=head2 $d->outobjdeep($fh)
Outputs the contents of the dictionary to a PDF file. This is a recursive call.
It also outputs a stream if the dictionary has a stream element. If this occurs
then this method will calculate the length of the stream and insert it into the
stream's dictionary.
=cut
sub outobjdeep {
my ($self, $fh, $pdf, %opts) = @_;
if (defined $self->{' stream'} or defined $self->{' streamfile'} or defined $self->{' streamloc'}) {
if ($self->{'Filter'} and $self->{' nofilt'}) {
$self->{'Length'} ||= PDF::API2::Basic::PDF::Number->new(length($self->{' stream'}));
}
elsif ($self->{'Filter'} or not defined $self->{' stream'}) {
$self->{'Length'} = PDF::API2::Basic::PDF::Number->new(0) unless defined $self->{'Length'};
$pdf->new_obj($self->{'Length'}) unless $self->{'Length'}->is_obj($pdf);
}
else {
$self->{'Length'} = PDF::API2::Basic::PDF::Number->new(length($self->{' stream'}));
## $self->{'Length'} = PDF::API2::Basic::PDF::Number->new(length($self->{' stream'}) + 1);
## this old code seams to burp acro6, lets see what breaks next -- fredo
}
}
$fh->print('<< ');
foreach my $key (sort {
$a eq 'Type' ? -1 : $b eq 'Type' ? 1 :
$a eq 'Subtype' ? -1 : $b eq 'Subtype' ? 1 : $a cmp $b
} keys %$self) {
next if $key =~ m/^[\s\-]/o;
next unless $self->{$key};
$fh->print('/' . PDF::API2::Basic::PDF::Name::string_to_name($key, $pdf) . ' ');
$self->{$key}->outobj($fh, $pdf, %opts);
$fh->print(' ');
}
$fh->print('>>');
# Now handle the stream (if any)
my (@filters, $loc);
if (defined $self->{' streamloc'} and not defined $self->{' stream'}) {
# read a stream if in file
$loc = $fh->tell();
$self->read_stream();
$fh->seek($loc, 0);
}
if (not $self->{' nofilt'} and defined $self->{'Filter'} and (defined $self->{' stream'} or defined $self->{' streamfile'})) {
my $hasflate = -1;
for my $i (0 .. scalar(@{$self->{'Filter'}{' val'}}) - 1) {
my $filter = $self->{'Filter'}{' val'}[$i]->val();
# hack to get around LZW patent
if ($filter eq 'LZWDecode') {
if ($hasflate < -1) {
$hasflate = $i;
next;
}
$filter = 'FlateDecode';
$self->{'Filter'}{' val'}[$i]{'val'} = $filter; # !!!
}
elsif ($filter eq 'FlateDecode') {
$hasflate = -2;
}
my $filter_class = "PDF::API2::Basic::PDF::Filter::$filter";
push (@filters, $filter_class->new());
}
splice(@{$self->{'Filter'}{' val'}}, $hasflate, 1) if $hasflate > -1;
}
if (defined $self->{' stream'}) {
$fh->print(" stream\n");
$loc = $fh->tell();
my $stream = $self->{' stream'};
unless ($self->{' nofilt'}) {
foreach my $filter (reverse @filters) {
$stream = $filter->outfilt($stream, 1);
}
}
$fh->print($stream);
## $fh->print("\n"); # newline goes into endstream
}
elsif (defined $self->{' streamfile'}) {
open(my $dictfh, "<", $self->{' streamfile'}) || die "Unable to open $self->{' streamfile'}";
binmode($dictfh, ':raw');
$fh->print(" stream\n");
$loc = $fh->tell();
my $stream;
while (read($dictfh, $stream, 4096)) {
unless ($self->{' nofilt'}) {
foreach my $filter (reverse @filters) {
$stream = $filter->outfilt($stream, 0);
}
}
$fh->print($stream);
}
close $dictfh;
unless ($self->{' nofilt'}) {
$stream = '';
foreach my $filter (reverse @filters) {
$stream = $filter->outfilt($stream, 1);
}
$fh->print($stream);
}
## $fh->print("\n"); # newline goes into endstream
}
if (defined $self->{' stream'} or defined $self->{' streamfile'}) {
my $length = $fh->tell() - $loc;
unless ($self->{'Length'}{'val'} == $length) {
$self->{'Length'}{'val'} = $length;
$pdf->out_obj($self->{'Length'}) if $self->{'Length'}->is_obj($pdf);
}
$fh->print("\nendstream"); # next is endobj which has the final cr
}
}
=head2 $d->read_stream($force_memory)
Reads in a stream from a PDF file. If the stream is greater than
C<PDF::Dict::mincache> (defaults to 32768) bytes to be stored, then
the default action is to create a file for it somewhere and to use that
file as a data cache. If $force_memory is set, this caching will not
occur and the data will all be stored in the $self->{' stream'}
variable.
=cut
sub read_stream {
my ($self, $force_memory) = @_;
my $fh = $self->{' streamsrc'};
my $len = $self->{'Length'}->val();
$self->{' stream'} = '';
my @filters;
if (defined $self->{'Filter'}) {
my $i = 0;
foreach my $filter ($self->{'Filter'}->elementsof()) {
my $filter_class = "PDF::API2::Basic::PDF::Filter::" . $filter->val();
unless ($self->{'DecodeParms'}) {
push(@filters, $filter_class->new());
}
elsif ($self->{'Filter'}->isa('PDF::API2::Basic::PDF::Name') and $self->{'DecodeParms'}->isa('PDF::API2::Basic::PDF::Dict')) {
push(@filters, $filter_class->new($self->{'DecodeParms'}));
}
elsif ($self->{'DecodeParms'}->isa('PDF::API2::Basic::PDF::Array')) {
my $parms = $self->{'DecodeParms'}->val->[$i];
push(@filters, $filter_class->new($parms));
}
else {
push(@filters, $filter_class->new());
}
$i++;
}
}
my $last = 0;
if (defined $self->{' streamfile'}) {
unlink ($self->{' streamfile'});
$self->{' streamfile'} = undef;
}
seek $fh, $self->{' streamloc'}, 0;
my $dictfh;
my $readlen = 4096;
for (my $i = 0; $i < $len; $i += $readlen) {
my $data;
unless ($i + $readlen > $len) {
read $fh, $data, $readlen;
}
else {
$last = 1;
read $fh, $data, $len - $i;
}
foreach my $filter (@filters) {
$data = $filter->infilt($data, $last);
}
# Start using a temporary file if the stream gets too big
if (not $force_memory and not defined $self->{' streamfile'} and (length($self->{' stream'}) + length($data)) > $mincache) {
$dictfh = File::Temp->new(TEMPLATE => 'pdfXXXXX', SUFFIX => 'dat', TMPDIR => 1);
$self->{' streamfile'} = $dictfh->filename();
print $dictfh $self->{' stream'};
undef $self->{' stream'};
}
if (defined $self->{' streamfile'}) {
print $dictfh $data;
}
else {
$self->{' stream'} .= $data;
}
}
close $dictfh if defined $self->{' streamfile'};
$self->{' nofilt'} = 0;
return $self;
}
=head2 $d->val
Returns the dictionary, which is itself.
=cut
sub val {
return $_[0];
}
1;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,114 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::Filter;
use strict;
use warnings;
our $VERSION = '2.033'; # VERSION
use PDF::API2::Basic::PDF::Filter::ASCII85Decode;
use PDF::API2::Basic::PDF::Filter::ASCIIHexDecode;
use PDF::API2::Basic::PDF::Filter::FlateDecode;
use PDF::API2::Basic::PDF::Filter::LZWDecode;
use PDF::API2::Basic::PDF::Filter::RunLengthDecode;
use Scalar::Util qw(blessed reftype);
=head1 NAME
PDF::API2::Basic::PDF::Filter - Abstract superclass for PDF stream filters
=head1 SYNOPSIS
$f = PDF::API2::Basic::PDF::Filter->new;
$str = $f->outfilt($str, 1);
print OUTFILE $str;
while (read(INFILE, $dat, 4096))
{ $store .= $f->infilt($dat, 0); }
$store .= $f->infilt("", 1);
=head1 DESCRIPTION
A Filter object contains state information for the process of outputting
and inputting data through the filter. The precise state information stored
is up to the particular filter and may range from nothing to whole objects
created and destroyed.
Each filter stores different state information for input and output and thus
may handle one input filtering process and one output filtering process at
the same time.
=head1 METHODS
=head2 PDF::API2::Basic::PDF::Filter->new
Creates a new filter object with empty state information ready for processing
data both input and output.
=head2 $dat = $f->infilt($str, $isend)
Filters from output to input the data. Notice that $isend == 0 implies that there
is more data to come and so following it $f may contain state information
(usually due to the break-off point of $str not being tidy). Subsequent calls
will incorporate this stored state information.
$isend == 1 implies that there is no more data to follow. The
final state of $f will be that the state information is empty. Error messages
are most likely to occur here since if there is required state information to
be stored following this data, then that would imply an error in the data.
=head2 $str = $f->outfilt($dat, $isend)
Filter stored data ready for output. Parallels C<infilt>.
=cut
sub new {
my $class = shift();
my $self = {};
bless $self, $class;
return $self;
}
sub release {
my $self = shift();
return $self unless ref($self);
# delete stuff that we know we can, here
my @tofree = map { delete $self->{$_} } keys %$self;
while (my $item = shift @tofree) {
my $ref = ref($item);
if (blessed($item) and $item->can('release')) {
$item->release();
}
elsif ($ref eq 'ARRAY') {
push @tofree, @$item;
}
elsif (defined(reftype($ref)) and reftype($ref) eq 'HASH') {
release($item);
}
}
# check that everything has gone
foreach my $key (keys %$self) {
# warn ref($self) . " still has '$key' key left after release.\n";
$self->{$key} = undef;
delete $self->{$key};
}
}
1;

View File

@@ -0,0 +1,88 @@
package PDF::API2::Basic::PDF::Filter::ASCII85Decode;
use base 'PDF::API2::Basic::PDF::Filter';
use strict;
use warnings;
our $VERSION = '2.033'; # VERSION
sub outfilt {
my ($self, $str, $isend) = @_;
my ($res, $i, $j, $b, @c);
if (exists $self->{'outcache'} and $self->{'outcache'} ne "") {
$str = $self->{'outcache'} . $str;
$self->{'outcache'} = "";
}
for ($i = 0; $i + 4 <= length($str); $i += 4) {
$b = unpack("N", substr($str, $i, 4));
if ($b == 0) {
$res .= "z";
next;
}
for ($j = 0; $j < 4; $j++) {
$c[$j] = $b - int($b / 85) * 85 + 33; $b /= 85;
}
$res .= pack("C5", $b + 33, reverse @c);
$res .= "\n" if ($i % 60 == 56);
}
if ($isend && $i < length($str)) {
$str = substr($str, $i);
$b = unpack("N", $str . ("\000" x (4 - length($str))));
for ($j = 0; $j < 4; $j++) {
$c[$j] = $b - int($b / 85) * 85 + 33; $b /= 85;
}
push @c, $b + 33;
$res .= substr(pack("C5", reverse @c), 0, length($str) + 1) . '~>';
}
elsif ($isend) {
$res .= '~>';
}
elsif ($i + 4 > length($str)) {
$self->{'outcache'} = substr($str, $i);
}
return $res;
}
sub infilt {
my ($self, $str, $isend) = @_;
my ($res, $i, $j, @c, $b, $num);
$num = 0;
if (exists($self->{'incache'}) && $self->{'incache'} ne "") {
$str = $self->{'incache'} . $str;
$self->{'incache'} = "";
}
$str =~ s/(\r|\n)\n?//og;
for ($i = 0; $i < length($str); $i += 5) {
last if $isend and substr($str, $i, 6) eq '~>';
$b = 0;
if (substr($str, $i, 1) eq "z") {
$i -= 4;
$res .= pack("N", 0);
next;
}
elsif ($isend && substr($str, $i, 6) =~ m/^(.{2,4})\~\>$/o) {
$num = 5 - length($1);
@c = unpack("C5", $1 . ("u" x (4 - $num))); # pad with 84 to sort out rounding
$i = length($str);
}
else {
@c = unpack("C5", substr($str, $i, 5));
}
for ($j = 0; $j < 5; $j++) {
$b *= 85;
$b += $c[$j] - 33;
}
$res .= substr(pack("N", $b), 0, 4 - $num);
}
if (!$isend && $i > length($str)) {
$self->{'incache'} = substr($str, $i - 5);
}
return $res;
}
1;

View File

@@ -0,0 +1,58 @@
package PDF::API2::Basic::PDF::Filter::ASCIIHexDecode;
use base 'PDF::API2::Basic::PDF::Filter';
use strict;
use warnings;
our $VERSION = '2.033'; # VERSION
# Maintainer's Note: ASCIIHexDecode is described in the PDF 1.7 spec
# in section 7.4.2.
sub outfilt {
my ($self, $string, $include_eod) = @_;
# Each byte of the input string gets encoded as two hexadecimal
# characters.
$string =~ s/(.)/sprintf('%02x', ord($1))/oge;
# The EOD (end-of-document) marker is a greater-than sign
$string .= '>' if $include_eod;
return $string;
}
sub infilt {
my ($self, $string) = @_;
# "All white-space characters shall be ignored."
$string =~ s/\s//og;
# "A GREATER-THAN SIGN (3Eh) indicates EOD."
my $has_eod_marker = 0;
if (substr($string, -1, 1) eq '>') {
$has_eod_marker = 1;
chop $string;
}
# "Any other characters [than 0-9, A-F, or a-f] shall cause an
# error."
die "Illegal character found in ASCII hex-encoded stream"
if $string =~ /[^0-9A-Fa-f]/;
# "If the filter encounters the EOD marker after reading an odd
# number of hexadecimal digits, it shall behave as if a 0 (zero)
# followed the last digit."
if ($has_eod_marker and length($string) % 2 == 1) {
$string .= '0';
}
# "The ASCIIHexDecode filter shall produce one byte of binary data
# for each pair of ASCII hexadecimal digits."
$string =~ s/([0-9A-Fa-f]{2})/pack("C", hex($1))/oge;
return $string;
}
1;

View File

@@ -0,0 +1,150 @@
package PDF::API2::Basic::PDF::Filter::FlateDecode;
use base 'PDF::API2::Basic::PDF::Filter';
use strict;
no warnings qw[ deprecated recursion uninitialized ];
our $VERSION = '2.033'; # VERSION
use POSIX qw(ceil floor);
our $havezlib;
BEGIN
{
eval { require Compress::Zlib };
$havezlib = !$@;
}
sub new
{
return unless $havezlib;
my ($class, $decode_parms) = @_;
my ($self) = {
DecodeParms => $decode_parms,
};
$self->{'outfilt'} = Compress::Zlib::deflateInit(
-Level=>9,
-Bufsize=>32768,
);
$self->{'infilt'} = Compress::Zlib::inflateInit();
bless $self, $class;
}
sub outfilt
{
my ($self, $str, $isend) = @_;
my ($res);
$res = $self->{'outfilt'}->deflate($str);
$res .= $self->{'outfilt'}->flush() if ($isend);
$res;
}
sub infilt
{
my ($self, $dat, $last) = @_;
my ($res, $status) = $self->{'infilt'}->inflate("$dat");
if ($self->{'DecodeParms'} and $self->{'DecodeParms'}->{'Predictor'}) {
my $predictor = $self->{'DecodeParms'}->{'Predictor'}->val();
if ($predictor == 2) {
die "The TIFF predictor logic has not been implemented";
}
elsif ($predictor >= 10 and $predictor <= 15) {
$res = $self->_depredict_png($res);
}
else {
die "Invalid predictor: $predictor";
}
}
return $res;
}
sub _depredict_png {
my ($self, $stream) = @_;
my $param = $self->{'DecodeParms'};
my $prev = '';
$stream = $self->{'_depredict_next'} . $stream if defined $self->{'_depredict_next'};
$prev = $self->{'_depredict_prev'} if defined $self->{'_depredict_prev'};
my $alpha = $param->{Alpha} ? $param->{Alpha}->val() : 0;
my $bpc = $param->{BitsPerComponent} ? $param->{BitsPerComponent}->val() : 8;
my $colors = $param->{Colors} ? $param->{Colors}->val() : 1;
my $columns = $param->{Columns} ? $param->{Columns}->val() : 1;
my $height = $param->{Height} ? $param->{Height}->val() : 0;
my $comp = $colors + $alpha;
my $bpp = ceil($bpc * $comp / 8);
my $scanline = 1 + ceil($bpp * $columns);
my $clearstream = '';
my $lastrow = ($height || int(length($stream) / $scanline)) - 1;
foreach my $n (0 .. $lastrow) {
# print STDERR "line $n:";
my $line = substr($stream, $n * $scanline, $scanline);
my $filter = vec($line, 0, 8);
my $clear = '';
$line = substr($line, 1);
# print STDERR " filter=$filter ";
if ($filter == 0) {
$clear = $line;
}
elsif ($filter == 1) {
foreach my $x (0 .. length($line) - 1) {
vec($clear, $x, 8) = (vec($line, $x, 8) + vec($clear, $x - $bpp, 8)) % 256;
}
}
elsif ($filter == 2) {
foreach my $x (0 .. length($line) - 1) {
vec($clear, $x, 8) = (vec($line, $x, 8) + vec($prev, $x, 8)) % 256;
}
}
elsif ($filter == 3) {
foreach my $x (0 .. length($line) - 1) {
vec($clear, $x, 8) = (vec($line, $x, 8) + floor((vec($clear, $x - $bpp, 8) + vec($prev, $x, 8)) / 2)) % 256;
}
}
elsif ($filter == 4) {
foreach my $x (0 .. length($line) - 1) {
vec($clear, $x, 8) = (vec($line, $x, 8) + _paeth_predictor(vec($clear, $x - $bpp, 8), vec($prev, $x, 8), vec($prev, $x - $bpp, 8))) % 256;
}
}
else {
die "Unexpected depredictor algorithm $filter requested on line $n (valid options are 0-4)";
}
$prev = $clear;
foreach my $x (0 .. ($columns * $comp) - 1) {
vec($clearstream, ($n * $columns * $comp) + $x, $bpc) = vec($clear, $x, $bpc);
# print STDERR "" . vec($clear, $x, $bpc) . ",";
}
# print STDERR "\n";
}
$self->{'_depredict_next'} = substr($stream, ($lastrow + 1) * $scanline);
$self->{'_depredict_prev'} = $prev;
return $clearstream;
}
sub _paeth_predictor {
my ($a, $b, $c) = @_;
my $p = $a + $b - $c;
my $pa = abs($p - $a);
my $pb = abs($p - $b);
my $pc = abs($p - $c);
if ($pa <= $pb && $pa <= $pc) {
return $a;
}
elsif ($pb <= $pc) {
return $b;
}
else {
return $c;
}
}
1;

View File

@@ -0,0 +1,97 @@
package PDF::API2::Basic::PDF::Filter::LZWDecode;
use base 'PDF::API2::Basic::PDF::Filter::FlateDecode';
use strict;
no warnings qw[ deprecated recursion uninitialized ];
our $VERSION = '2.033'; # VERSION
sub new {
my ($class, $decode_parms) = @_;
my $self = {
DecodeParms => $decode_parms,
};
$self->{'table'} = [map { pack('C', $_) } (0 .. 255, 0, 0)];
$self->{'initial_code_length'} = 9;
$self->{'code_length'} = 9;
$self->{'clear_table'} = 256;
$self->{'eod_marker'} = 257;
$self->{'next_code'} = 258;
bless $self, $class;
return $self;
}
sub infilt {
my ($self, $data, $is_last) = @_;
my ($code, $result);
my $partial_code = $self->{'partial_code'};
my $partial_bits = $self->{'partial_bits'};
my $early_change = 1;
if ($self->{'DecodeParms'} and $self->{'DecodeParms'}->{'EarlyChange'}) {
$early_change = $self->{'DecodeParms'}->{'EarlyChange'}->val();
}
while ($data ne '') {
($code, $partial_code, $partial_bits) = $self->read_dat(\$data, $partial_code, $partial_bits, $self->{'code_length'});
last unless defined $code;
unless ($early_change) {
if ($self->{'next_code'} == (1 << $self->{'code_length'}) and $self->{'code_length'} < 12) {
$self->{'code_length'}++;
}
}
if ($code == $self->{'clear_table'}) {
$self->{'code_length'} = $self->{'initial_code_length'};
$self->{'next_code'} = $self->{'eod_marker'} + 1;
next;
}
elsif ($code == $self->{'eod_marker'}) {
last;
}
elsif ($code > $self->{'eod_marker'}) {
$self->{'table'}[$self->{'next_code'}] = $self->{'table'}[$code];
$self->{'table'}[$self->{'next_code'}] .= substr($self->{'table'}[$code + 1], 0, 1);
$result .= $self->{'table'}[$self->{'next_code'}];
$self->{'next_code'}++;
}
else {
$self->{'table'}[$self->{'next_code'}] = $self->{'table'}[$code];
$result .= $self->{'table'}[$self->{'next_code'}];
$self->{'next_code'}++;
}
if ($early_change) {
if ($self->{'next_code'} == (1 << $self->{'code_length'}) and $self->{'code_length'} < 12) {
$self->{'code_length'}++;
}
}
}
$self->{'partial_code'} = $partial_code;
$self->{'partial_bits'} = $partial_bits;
return $result;
}
sub read_dat {
my ($self, $data_ref, $partial_code, $partial_bits, $code_length) = @_;
$partial_bits = 0 unless defined $partial_bits;
while ($partial_bits < $code_length) {
return (undef, $partial_code, $partial_bits) unless length($$data_ref);
$partial_code = ($partial_code << 8) + unpack('C', $$data_ref);
substr($$data_ref, 0, 1) = '';
$partial_bits += 8;
}
my $code = $partial_code >> ($partial_bits - $code_length);
$partial_code &= (1 << ($partial_bits - $code_length)) - 1;
$partial_bits -= $code_length;
return ($code, $partial_code, $partial_bits);
}
1;

View File

@@ -0,0 +1,109 @@
package PDF::API2::Basic::PDF::Filter::RunLengthDecode;
use base 'PDF::API2::Basic::PDF::Filter';
use strict;
use warnings;
our $VERSION = '2.033'; # VERSION
# Maintainer's Note: RunLengthDecode is described in the PDF 1.7 spec
# in section 7.4.5.
sub outfilt {
my ($self, $input, $include_eod) = @_;
my $output;
while ($input ne '') {
my ($unrepeated, $repeated);
# Look for a repeated character (which can be repeated up to
# 127 times)
if ($input =~ m/^(.*?)((.)\3{1,127})(.*)$/so) {
$unrepeated = $1;
$repeated = $2;
$input = $4;
}
else {
$unrepeated = $input;
$input = '';
}
# Print any non-repeating bytes at the beginning of the input
# in chunks of up to 128 bytes, prefixed with a run-length (0
# to 127, signifying 1 to 128 bytes)
while (length($unrepeated) > 127) {
$output .= pack('C', 127) . substr($unrepeated, 0, 128);
substr($unrepeated, 0, 128) = '';
}
$output .= pack('C', length($unrepeated) - 1) . $unrepeated if length($unrepeated) > 0;
# Then print the number of times the repeated byte was
# repeated (using the formula "257 - length" to give a result
# in the 129-255 range) followed by the byte to be repeated
if (length($repeated)) {
$output .= pack('C', 257 - length($repeated)) . substr($repeated, 0, 1);
}
}
# A byte value of 128 signifies that we're done.
$output .= "\x80" if $include_eod;
return $output;
}
sub infilt {
my ($self, $input, $is_terminated) = @_;
my ($output, $length);
# infilt may be called multiple times, and is expected to continue
# where it left off
if (exists $self->{'incache'}) {
$input = $self->{'incache'} . $input;
delete $self->{'incache'};
}
while (length($input)) {
# Read a length byte
$length = unpack("C", $input);
# A "length" of 128 represents the end of the document
if ($length == 128) {
return $output;
}
# Any other length needs to be followed by at least one other byte
if (length($input) == 1 and not $is_terminated) {
die "Premature end to RunLengthEncoded data";
}
# A length of 129-255 represents a repeated string
# (number of repeats = 257 - length)
if ($length > 128) {
if (length($input) == 1) {
# Out of data. Defer until the next call.
$self->{'incache'} = $input;
return $output;
}
$output .= substr($input, 1, 1) x (257 - $length);
substr($input, 0, 2) = '';
}
# Any other length (under 128) represents a non-repeated
# stream of bytes (with a length of 0 to 127 representing 1 to
# 128 bytes)
else {
if (length($input) < $length + 2) {
# Insufficient data. Defer until the next call.
$self->{'incache'} = $input;
return $output;
}
$output .= substr($input, 1, $length + 1);
substr($input, 0, $length + 2) = '';
}
}
return $output;
}
1;

View File

@@ -0,0 +1,87 @@
# Literal PDF Object for Dirty Hacks ...
package PDF::API2::Basic::PDF::Literal;
use base 'PDF::API2::Basic::PDF::Objind';
use strict;
our $VERSION = '2.033'; # VERSION
use PDF::API2::Basic::PDF::Filter;
use PDF::API2::Basic::PDF::Name;
use Scalar::Util qw(blessed);
no warnings qw[ deprecated recursion uninitialized ];
sub new
{
my ($class, @opts) = @_;
my ($self);
$class = ref $class if ref $class;
$self = $class->SUPER::new(@_);
$self->{' realised'} = 1;
if(scalar @opts > 1) {
$self->{-isdict}=1;
my %opt=@opts;
foreach my $k (keys %opt) {
$self->{$k} = $opt{$k};
}
} elsif(scalar @opts == 1) {
$self->{-literal}=$opts[0];
}
return $self;
}
sub outobjdeep
{
my ($self, $fh, $pdf, %opts) = @_;
if($self->{-isdict})
{
if(defined $self->{' stream'})
{
$self->{Length} = length($self->{' stream'}) + 1;
}
else
{
delete $self->{Length};
}
$fh->print("<< ");
foreach my $k (sort keys %{$self})
{
next if($k=~m|^[ \-]|o);
$fh->print('/'.PDF::API2::Basic::PDF::Name::string_to_name($k).' ');
if(ref($self->{$k}) eq 'ARRAY')
{
$fh->print('['.join(' ',@{$self->{$k}})."]\n");
}
elsif(ref($self->{$k}) eq 'HASH')
{
$fh->print('<<'.join(' ', map { '/'.PDF::API2::Basic::PDF::Name::string_to_name($_).' '.$self->{$k}->{$_} } sort keys %{$self->{$k}})." >>\n");
}
elsif(blessed($self->{$k}) and $self->{$k}->can('outobj'))
{
$self->{$k}->outobj($fh, $pdf, %opts);
$fh->print("\n");
}
else
{
$fh->print("$self->{$k}\n");
}
}
$fh->print(">>\n");
if(defined $self->{' stream'})
{
$fh->print("stream\n$self->{' stream'}\nendstream"); # next is endobj which has the final cr
}
}
else
{
$fh->print($self->{-literal}); # next is endobj which has the final cr
}
}
sub val
{ $_[0]; }
1;

View File

@@ -0,0 +1,116 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::Name;
use base 'PDF::API2::Basic::PDF::String';
use strict;
our $VERSION = '2.033'; # VERSION
=head1 NAME
PDF::API2::Basic::PDF::Name - Inherits from L<PDF::API2::Basic::PDF::String>
and stores PDF names (things beginning with /)
=head1 METHODS
=head2 PDF::API2::Basic::PDF::Name->from_pdf($string)
Creates a new string object (not a full object yet) from a given
string. The string is parsed according to input criteria with
escaping working, particular to Names.
=cut
sub from_pdf {
my ($class, $string, $pdf) = @_;
my ($self) = $class->SUPER::from_pdf($string);
$self->{'val'} = name_to_string($self->{'val'}, $pdf);
return $self;
}
=head2 $n->convert ($string, $pdf)
Converts a name into a string by removing the / and converting any hex
munging.
=cut
sub convert {
my ($self, $string, $pdf) = @_;
$string = name_to_string($string, $pdf);
return $string;
}
=head2 $s->as_pdf ($pdf)
Returns a name formatted as PDF. $pdf is optional but should be the
PDF File object for which the name is intended if supplied.
=cut
sub as_pdf {
my ($self, $pdf) = @_;
my $string = $self->{'val'};
$string = string_to_name($string, $pdf);
return '/' . $string;
}
# Prior to PDF version 1.2, '#' was a literal character. Embedded
# spaces were implicitly allowed in names as well but it would be best
# to ignore that (PDF 1.3, section H.3.2.4.3).
=head2 PDF::API2::Basic::PDF::Name->string_to_name ($string, $pdf)
Suitably encode the string $string for output in the File object $pdf
(the exact format may depend on the version of $pdf).
=cut
sub string_to_name {
my ($string, $pdf) = @_;
# PDF 1.0 and 1.1 didn't treat the # symbol as an escape character
unless ($pdf and $pdf->{' version'} and $pdf->{' version'} < 2) {
$string =~ s|([\x00-\x20\x7f-\xff%()\[\]{}<>#/])|'#' . sprintf('%02X', ord($1))|oge;
}
return $string;
}
=head2 PDF::API2::Basic::PDF::Name->name_to_string ($string, $pdf)
Suitably decode the string $string as read from the File object $pdf
(the exact decoding may depend on the version of $pdf). Principally,
undo the hex encoding for PDF versions > 1.1.
=cut
sub name_to_string {
my ($string, $pdf) = @_;
$string =~ s|^/||o;
# PDF 1.0 and 1.1 didn't treat the # symbol as an escape character
unless ($pdf and $pdf->{' version'} and $pdf->{' version'} < 2) {
$string =~ s/#([0-9a-f]{2})/chr(hex($1))/oige;
}
return $string;
}
1;

View File

@@ -0,0 +1,94 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::Null;
use base 'PDF::API2::Basic::PDF::Objind';
use strict;
our $VERSION = '2.033'; # VERSION
=head1 NAME
PDF::API2::Basic::PDF::Null - PDF Null type object. This is a subclass of
PDF::API2::Basic::PDF::Objind and cannot be subclassed.
=head1 METHODS
=cut
# There is only one null object (section 3.2.8).
my $null_obj = bless {}, 'PDF::API2::Basic::PDF::Null';
=head2 PDF::API2::Basic::PDF::Null->new
Returns the null object. There is only one null object.
=cut
sub new {
return $null_obj;
}
=head2 $s->realise
Pretends to finish reading the object.
=cut
sub realise {
return $null_obj;
}
=head2 $s->outobjdeep
Output the object in PDF format.
=cut
sub outobjdeep {
my ($self, $fh, $pdf) = @_;
$fh->print('null');
}
=head2 $s->is_obj
Returns false because null is not a full object.
=cut
sub is_obj {
return 0;
}
=head2 $s->copy
Another no-op.
=cut
sub copy {
return $null_obj;
}
=head2 $s->val
Return undef.
=cut
sub val {
return undef; ## no critic (undef is intentional)
}
1;

View File

@@ -0,0 +1,47 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::Number;
use base 'PDF::API2::Basic::PDF::String';
use strict;
our $VERSION = '2.033'; # VERSION
=head1 NAME
PDF::API2::Basic::PDF::Number - Numbers in PDF. Inherits from L<PDF::API2::Basic::PDF::String>
=head1 METHODS
=head2 $n->convert($str)
Converts a string from PDF to internal, by doing nothing
=cut
sub convert {
return $_[1];
}
=head2 $n->as_pdf
Converts a number to PDF format
=cut
sub as_pdf {
return $_[0]->{'val'};
}
1;

View File

@@ -0,0 +1,303 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::Objind;
use strict;
use warnings;
our $VERSION = '2.033'; # VERSION
=head1 NAME
PDF::API2::Basic::PDF::Objind - PDF indirect object reference. Also acts as an abstract
superclass for all elements in a PDF file.
=head1 INSTANCE VARIABLES
Instance variables differ from content variables in that they all start with
a space.
=over
=item parent
For an object which is a reference to an object in some source, this holds the
reference to the source object, so that should the reference have to be
de-referenced, then we know where to go and get the info.
=item objnum (R)
The object number in the source (only for object references)
=item objgen (R)
The object generation in the source
There are other instance variables which are used by the parent for file control.
=item isfree
This marks whether the object is in the free list and available for re-use as
another object elsewhere in the file.
=item nextfree
Holds a direct reference to the next free object in the free list.
=back
=head1 METHODS
=cut
use Scalar::Util qw(blessed reftype weaken);
use vars qw($uidc @inst %inst);
$uidc = "pdfuid000";
# protected keys during emptying and copying, etc.
@inst = qw(parent objnum objgen isfree nextfree uid realised);
$inst{" $_"} = 1 for @inst;
=head2 PDF::API2::Basic::PDF::Objind->new()
Creates a new indirect object
=cut
sub new {
my ($class) = @_;
bless {}, ref $class || $class;
}
=head2 uid
Returns a Unique id for this object, creating one if it didn't have one before
=cut
sub uid {
$_[0]->{' uid'} || ($_[0]->{' uid'} = $uidc++);
}
=head2 $r->release
Releases ALL of the memory used by this indirect object, and all of
its component/child objects. This method is called automatically by
'C<PDF::API2::Basic::PDF::File-E<gt>release>' (so you don't have to
call it yourself).
B<Note:> it is important that this method get called at some point
prior to the actual destruction of the object. Internally, PDF files
have an enormous amount of cross-references and this causes circular
references within our own internal data structures. Calling
'C<release()>' forces these circular references to be cleaned up and
the entire internal data structure purged.
=cut
# Maintainer's Question: Couldn't this be handled by a DESTROY method
# instead of requiring an explicit call to release()?
sub release {
my ($self) = @_;
my @tofree = values %$self;
%$self = ();
while (my $item = shift @tofree) {
# common case: value is not reference
my $ref = ref($item) || next;
if (blessed($item) and $item->can('release')) {
$item->release();
}
elsif ($ref eq 'ARRAY') {
push @tofree, @$item;
}
elsif (defined(reftype($ref)) and reftype($ref) eq 'HASH') {
release($item);
}
}
}
=head2 $r->val
Returns the value of this object or reads the object and then returns
its value.
Note that all direct subclasses *must* make their own versions of this
subroutine otherwise we could be in for a very deep loop!
=cut
sub val {
my ($self) = @_;
$self->{' parent'}->read_obj(@_)->val unless $self->{' realised'};
}
=head2 $r->realise
Makes sure that the object is fully read in, etc.
=cut
sub realise {
$_[0]->{' realised'} ? $_[0] : $_[0]->{' objnum'} ? $_[0]->{' parent'}->read_obj(@_) : $_[0];
}
=head2 $r->outobjdeep($fh, $pdf)
If you really want to output this object, then you must need to read it first.
This also means that all direct subclasses must subclass this method or loop forever!
=cut
sub outobjdeep {
my ($self, $fh, $pdf, %opts) = @_;
$self->{' parent'}->read_obj($self)->outobjdeep($fh, $pdf, %opts) unless $self->{' realised'};
}
=head2 $r->outobj($fh)
If this is a full object then outputs a reference to the object, otherwise calls
outobjdeep to output the contents of the object at this point.
=cut
sub outobj {
my ($self, $fh, $pdf, %opts) = @_;
if (defined $pdf->{' objects'}{$self->uid}) {
$fh->printf("%d %d R", @{$pdf->{' objects'}{$self->uid}}[0..1]);
}
else {
$self->outobjdeep($fh, $pdf, %opts);
}
}
=head2 $r->elementsof
Abstract superclass function filler. Returns self here but should return
something more useful if an array.
=cut
sub elementsof {
my ($self) = @_;
if ($self->{' realised'}) {
return $self;
}
else {
return $self->{' parent'}->read_obj($self)->elementsof;
}
}
=head2 $r->empty
Empties all content from this object to free up memory or to be read to pass
the object into the free list. Simplistically undefs all instance variables
other than object number and generation.
=cut
sub empty {
my ($self) = @_;
for my $k (keys %$self) {
undef $self->{$k} unless $inst{$k};
}
return $self;
}
=head2 $r->merge($objind)
This merges content information into an object reference place-holder.
This occurs when an object reference is read before the object definition
and the information in the read data needs to be merged into the object
place-holder
=cut
sub merge {
my ($self, $other) = @_;
for my $k (keys %$other) {
next if $inst{$k};
$self->{$k} = $other->{$k};
# This doesn't seem like the right place to do this, but I haven't
# yet found all of the places where Parent is being set
weaken $self->{$k} if $k eq 'Parent';
}
$self->{' realised'} = 1;
bless $self, ref($other);
}
=head2 $r->is_obj($pdf)
Returns whether this object is a full object with its own object number or
whether it is purely a sub-object. $pdf indicates which output file we are
concerned that the object is an object in.
=cut
sub is_obj {
return defined $_[1]->{' objects'}{$_[0]->uid};
}
=head2 $r->copy($pdf, $res)
Returns a new copy of this object. The object is assumed to be some kind
of associative array and the copy is a deep copy for elements which are
not PDF objects, according to $pdf, and shallow copy for those that are.
Notice that calling C<copy> on an object forces at least a one level
copy even if it is a PDF object. The returned object loses its PDF
object status though.
If $res is defined then the copy goes into that object rather than creating a
new one. It is up to the caller to bless $res, etc. Notice that elements from
$self are not copied into $res if there is already an entry for them existing
in $res.
=cut
sub copy {
my ($self, $pdf, $res) = @_;
unless (defined $res) {
$res = {};
bless $res, ref($self);
}
foreach my $k (keys %$self) {
next if $inst{$k};
next if defined $res->{$k};
if (blessed($self->{$k}) and $self->{$k}->can('is_obj') and not $self->{$k}->is_obj($pdf)) {
$res->{$k} = $self->{$k}->copy($pdf);
}
else {
$res->{$k} = $self->{$k};
}
}
return $res;
}
1;

View File

@@ -0,0 +1,130 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::Page;
use base 'PDF::API2::Basic::PDF::Pages';
use strict;
no warnings qw[ deprecated recursion uninitialized ];
our $VERSION = '2.033'; # VERSION
use PDF::API2::Basic::PDF::Dict;
use PDF::API2::Basic::PDF::Utils;
=head1 NAME
PDF::API2::Basic::PDF::Page - Represents a PDF page, inherits from L<PDF::API2::Basic::PDF::Pages>
=head1 DESCRIPTION
Represents a page of output in PDF. It also keeps track of the content stream,
any resources (such as fonts) being switched, etc.
Page inherits from Pages due to a number of shared methods. They are really
structurally quite different.
=head1 INSTANCE VARIABLES
A page has various working variables:
=over
=item curstrm
The currently open stream
=back
=head1 METHODS
=head2 PDF::API2::Basic::PDF::Page->new($pdf, $parent, $index)
Creates a new page based on a pages object (perhaps the root object).
The page is also added to the parent at this point, so pages are ordered in
a PDF document in the order in which they are created rather than in the order
they are closed.
Only the essential elements in the page dictionary are created here, all others
are either optional or can be inherited.
The optional index value indicates the index in the parent list that this page
should be inserted (so that new pages need not be appended)
=cut
sub new
{
my ($class, $pdf, $parent, $index) = @_;
my ($self) = {};
$class = ref $class if ref $class;
$self = $class->SUPER::new($pdf, $parent);
$self->{'Type'} = PDFName('Page');
delete $self->{'Count'};
delete $self->{'Kids'};
$parent->add_page($self, $index);
$self;
}
=head2 $p->add($str)
Adds the string to the currently active stream for this page. If no stream
exists, then one is created and added to the list of streams for this page.
The slightly cryptic name is an aim to keep it short given the number of times
people are likely to have to type it.
=cut
sub add
{
my ($self, $str) = @_;
my ($strm) = $self->{' curstrm'};
if (!defined $strm)
{
$strm = PDF::API2::Basic::PDF::Dict->new;
foreach (@{$self->{' outto'}})
{ $_->new_obj($strm); }
$self->{'Contents'} = PDFArray() unless defined $self->{'Contents'};
unless (ref $self->{'Contents'} eq "PDF::API2::Basic::PDF::Array")
{ $self->{'Contents'} = PDFArray($self->{'Contents'}); }
$self->{'Contents'}->add_elements($strm);
$self->{' curstrm'} = $strm;
}
$strm->{' stream'} .= $str;
$self;
}
=head2 $p->ship_out($pdf)
Ships the page out to the given output file context
=cut
sub ship_out
{
my ($self, $pdf) = @_;
$pdf->ship_out($self);
if (defined $self->{'Contents'})
{ $pdf->ship_out($self->{'Contents'}->elementsof); }
$self;
}
1;

View File

@@ -0,0 +1,435 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::Pages;
use strict;
no warnings qw[ deprecated recursion uninitialized ];
use base 'PDF::API2::Basic::PDF::Dict';
our $VERSION = '2.033'; # VERSION
use PDF::API2::Basic::PDF::Array;
use PDF::API2::Basic::PDF::Dict;
use PDF::API2::Basic::PDF::Utils;
use Scalar::Util qw(weaken);
our %inst = map {$_ => 1} qw(Parent Type);
=head1 NAME
PDF::API2::Basic::PDF::Pages - a PDF pages hierarchical element. Inherits from L<PDF::API2::Basic::PDF::Dict>
=head1 DESCRIPTION
A Pages object is the parent to other pages objects or to page objects
themselves.
=head1 METHODS
=head2 PDF::API2::Basic::PDF::Pages->new($pdfs,$parent)
This creates a new Pages object. Notice that $parent here is not the
file context for the object but the parent pages object for this
pages. If we are using this class to create a root node, then $parent
should point to the file context, which is identified by not having a
Type of Pages. $pdfs is the file object (or objects) in which to
create the new Pages object.
=cut
sub new
{
my ($class, $pdfs, $parent) = @_;
my ($self);
$class = ref $class if ref $class;
$self = $class->SUPER::new($pdfs, $parent);
$self->{'Type'} = PDFName("Pages");
$self->{'Parent'} = $parent if defined $parent;
$self->{'Count'} = PDFNum(0);
$self->{'Kids'} = PDF::API2::Basic::PDF::Array->new;
$self->{' outto'} = ref $pdfs eq 'ARRAY' ? $pdfs : [$pdfs];
$self->out_obj(1);
weaken $_ for @{$self->{' outto'}};
weaken $self->{'Parent'} if defined $parent;
$self;
}
sub init
{
my ($self, $pdf) = @_;
$self->{' outto'} = [$pdf];
weaken $self->{' outto'}->[0] if defined $pdf;
$self;
}
=head2 $p->out_obj($isnew)
Tells all the files that this thing is destined for that they should output this
object come time to output. If this object has no parent, then it must be the
root. So set as the root for the files in question and tell it to be output too.
If $isnew is set, then call new_obj rather than out_obj to create as a new
object in the file.
=cut
sub out_obj
{
my ($self, $isnew) = @_;
foreach (@{$self->{' outto'}})
{
if ($isnew)
{ $_->new_obj($self); }
else
{ $_->out_obj($self); }
unless (defined $self->{'Parent'})
{
$_->{'Root'}{'Pages'} = $self;
$_->out_obj($_->{'Root'});
}
}
$self;
}
=head2 $p->find_page($pnum)
Returns the given page, using the page count values in the pages tree. Pages
start at 0.
=cut
sub find_page
{
my ($self, $pnum) = @_;
my ($top) = $self->get_top;
$top->find_page_recurse(\$pnum);
}
sub find_page_recurse
{
my ($self, $rpnum) = @_;
my $res;
if ($self->{'Count'}->realise->val <= $$rpnum)
{
$$rpnum -= $self->{'Count'}->val;
return;
}
foreach my $k ($self->{'Kids'}->realise->elementsof)
{
if ($k->{'Type'}->realise->val eq 'Page')
{
return $k if ($$rpnum == 0);
$$rpnum--;
}
elsif ($res = $k->realise->find_page_recurse($rpnum))
{ return $res; }
}
return;
}
=head2 $p->add_page($page, $pnum)
Inserts the page before the given $pnum. $pnum can be -ve to count from the END
of the document. -1 is after the last page. Likewise $pnum can be greater than the
number of pages currently in the document, to append.
This method only guarantees to provide a reasonable pages tree if pages are
appended or prepended to the document. Pages inserted in the middle of the
document may simply be inserted in the appropriate leaf in the pages tree without
adding any new branches or leaves. To tidy up such a mess, it is best to call
$p->rebuild_tree to rebuild the pages tree into something efficient.
=cut
sub add_page
{
my ($self, $page, $pnum) = @_;
my ($top) = $self->get_top;
my ($ppage, $ppages, $pindex, $ppnum);
$pnum = -1 unless (defined $pnum && $pnum <= $top->{'Count'}->val);
if ($pnum == -1)
{ $ppage = $top->find_page($top->{'Count'}->val - 1); }
else
{
$pnum = $top->{'Count'}->val + $pnum + 1 if ($pnum < 0);
$ppage = $top->find_page($pnum);
}
if (defined $ppage->{'Parent'})
{ $ppages = $ppage->{'Parent'}->realise; }
else
{ $ppages = $self; }
$ppnum = scalar $ppages->{'Kids'}->realise->elementsof;
if ($pnum == -1)
{ $pindex = -1; }
else
{
for ($pindex = 0; $pindex < $ppnum; $pindex++)
{ last if ($ppages->{'Kids'}{' val'}[$pindex] eq $ppage); }
$pindex = -1 if ($pindex == $ppnum);
}
$ppages->add_page_recurse($page->realise, $pindex);
for ($ppages = $page->{'Parent'}; defined $ppages->{'Parent'}; $ppages = $ppages->{'Parent'}->realise)
{ $ppages->out_obj->{'Count'}->realise->{'val'}++; }
$ppages->out_obj->{'Count'}->realise->{'val'}++;
$page;
}
sub add_page_recurse
{
my ($self, $page, $index) = @_;
my ($newpages, $ppages, $pindex, $ppnum);
if (scalar $self->{'Kids'}->elementsof >= 8 && $self->{'Parent'} && $index < 1)
{
$ppages = $self->{'Parent'}->realise;
$newpages = $self->new($self->{' outto'}, $ppages);
if ($ppages)
{
$ppnum = scalar $ppages->{'Kids'}->realise->elementsof;
for ($pindex = 0; $pindex < $ppnum; $pindex++)
{ last if ($ppages->{'Kids'}{' val'}[$pindex] eq $self); }
$pindex = -1 if ($pindex == $ppnum);
$ppages->add_page_recurse($newpages, $pindex);
}
}
else
{ $newpages = $self->out_obj; }
if ($index < 0)
{ push (@{$newpages->{'Kids'}->realise->{' val'}}, $page); }
else
{ splice (@{$newpages->{'Kids'}{' val'}}, $index, 0, $page); }
$page->{'Parent'} = $newpages;
weaken $page->{'Parent'};
}
=head2 $root_pages = $p->rebuild_tree([@pglist])
Rebuilds the pages tree to make a nice balanced tree that conforms to Adobe
recommendations. If passed a pglist then the tree is built for that list of
pages. No check is made of whether the pglist contains pages.
Returns the top of the tree for insertion in the root object.
=cut
sub rebuild_tree
{
my ($self, @pglist) = @_;
}
=head2 @pglist = $p->get_pages
Returns a list of page objects in the document in page order
=cut
sub get_pages
{
my ($self) = @_;
return $self->get_top->get_kids;
}
# only call this on the top level or anything you want pages below
sub get_kids
{
my ($self) = @_;
my @pglist;
foreach my $pgref ($self->{'Kids'}->elementsof)
{
$pgref->realise;
if ($pgref->{'Type'}->val =~ m/^Pages$/oi)
{ push (@pglist, $pgref->get_kids()); }
else
{ push (@pglist, $pgref); }
}
@pglist;
}
=head2 $p->find_prop($key)
Searches up through the inheritance tree to find a property.
=cut
sub find_prop
{
my ($self, $prop) = @_;
if (defined $self->{$prop})
{
if (ref $self->{$prop} && $self->{$prop}->isa("PDF::API2::Basic::PDF::Objind"))
{ return $self->{$prop}->realise; }
else
{ return $self->{$prop}; }
} elsif (defined $self->{'Parent'})
{ return $self->{'Parent'}->find_prop($prop); }
return;
}
=head2 $p->add_font($pdf, $font)
Creates or edits the resource dictionary at this level in the hierarchy. If
the font is already supported even through the hierarchy, then it is not added.
=cut
sub add_font
{
my ($self, $font, $pdf) = @_;
my ($name) = $font->{'Name'}->val;
my ($dict) = $self->find_prop('Resources');
my ($rdict);
return $self if ($dict ne "" && defined $dict->{'Font'} && defined $dict->{'Font'}{$name});
unless (defined $self->{'Resources'})
{
$dict = $dict ne "" ? $dict->copy($pdf) : PDFDict();
$self->{'Resources'} = $dict;
}
else
{ $dict = $self->{'Resources'}; }
$dict->{'Font'} = PDFDict() unless defined $self->{'Resources'}{'Font'};
$rdict = $dict->{'Font'}->val;
$rdict->{$name} = $font unless ($rdict->{$name});
if (ref $dict ne 'HASH' && $dict->is_obj($pdf))
{ $pdf->out_obj($dict); }
if (ref $rdict ne 'HASH' && $rdict->is_obj($pdf))
{ $pdf->out_obj($rdict); }
$self;
}
=head2 $p->bbox($xmin, $ymin, $xmax, $ymax, [$param])
Specifies the bounding box for this and all child pages. If the values are
identical to those inherited then no change is made. $param specifies the attribute
name so that other 'bounding box'es can be set with this method.
=cut
sub bbox
{
my ($self, @bbox) = @_;
my ($str) = $bbox[4] || 'MediaBox';
my ($inh) = $self->find_prop($str);
my ($test, $i);
if ($inh ne "")
{
$test = 1; $i = 0;
foreach my $e ($inh->elementsof)
{ $test &= $e->val == $bbox[$i++]; }
return $self if $test && $i == 4;
}
$inh = PDF::API2::Basic::PDF::Array->new;
foreach my $e (@bbox[0..3])
{ $inh->add_elements(PDFNum($e)); }
$self->{$str} = $inh;
$self;
}
=head2 $p->proc_set(@entries)
Ensures that the current resource contains all the entries in the proc_sets
listed. If necessary it creates a local resource dictionary to achieve this.
=cut
sub proc_set
{
my ($self, @entries) = @_;
my (@temp) = @entries;
my $dict;
$dict = $self->find_prop('Resource');
if ($dict ne "" && defined $dict->{'ProcSet'})
{
foreach my $e ($dict->{'ProcSet'}->elementsof)
{ @temp = grep($_ ne $e, @temp); }
return $self if (scalar @temp == 0);
@entries = @temp if defined $self->{'Resources'};
}
unless (defined $self->{'Resources'})
{ $self->{'Resources'} = $dict ne "" ? $dict->copy : PDFDict(); }
$self->{'Resources'}{'ProcSet'} = PDFArray() unless defined $self->{'ProcSet'};
foreach my $e (@entries)
{ $self->{'Resources'}{'ProcSet'}->add_elements(PDFName($e)); }
$self;
}
sub empty
{
my ($self) = @_;
my $parent = $self->{'Parent'};
$self->SUPER::empty;
if (defined $parent) {
$self->{'Parent'} = $parent;
weaken $self->{'Parent'};
}
$self;
}
sub dont_copy
{ return $inst{$_[1]} || $_[0]->SUPER::dont_copy($_[1]); }
=head2 $p->get_top
Returns the top of the pages tree
=cut
sub get_top
{
my ($self) = @_;
my ($p);
for ($p = $self; defined $p->{'Parent'}; $p = $p->{'Parent'})
{ }
$p->realise;
}
1;

View File

@@ -0,0 +1,221 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::String;
use base 'PDF::API2::Basic::PDF::Objind';
use strict;
our $VERSION = '2.033'; # VERSION
=head1 NAME
PDF::API2::Basic::PDF::String - PDF String type objects and superclass
for simple objects that are basically stringlike (Number, Name, etc.)
=head1 METHODS
=cut
our %trans = (
'n' => "\n",
'r' => "\r",
't' => "\t",
'b' => "\b",
'f' => "\f",
"\\" => "\\",
'(' => '(',
')' => ')',
);
our %out_trans = (
"\n" => 'n',
"\r" => 'r',
"\t" => 't',
"\b" => 'b',
"\f" => 'f',
"\\" => "\\",
'(' => '(',
')' => ')',
);
=head2 PDF::API2::Basic::PDF::String->from_pdf($string)
Creates a new string object (not a full object yet) from a given
string. The string is parsed according to input criteria with
escaping working.
=cut
sub from_pdf {
my ($class, $str) = @_;
my $self = {};
bless $self, $class;
$self->{'val'} = $self->convert($str);
$self->{' realised'} = 1;
return $self;
}
=head2 PDF::API2::Basic::PDF::String->new($string)
Creates a new string object (not a full object yet) from a given
string. The string is parsed according to input criteria with
escaping working.
=cut
sub new {
my ($class, $str) = @_;
my $self = {};
bless $self, $class;
$self->{'val'} = $str;
$self->{' realised'} = 1;
return $self;
}
=head2 $s->convert($str)
Returns $str converted as per criteria for input from PDF file
=cut
sub convert {
my ($self, $input) = @_;
my $output = '';
# Hexadecimal Strings (PDF 1.7 section 7.3.4.3)
if ($input =~ m|^\s*\<|o) {
$self->{' ishex'} = 1;
$output = $input;
# Remove any extraneous characters to simplify processing
$output =~ s/[^0-9a-f]+//gio;
$output = "<$output>";
# Convert each sequence of two hexadecimal characters into a byte
1 while $output =~ s/\<([0-9a-f]{2})/chr(hex($1)) . '<'/oige;
# If a single hexadecimal character remains, append 0 and
# convert it into a byte.
$output =~ s/\<([0-9a-f])\>/chr(hex($1 . '0'))/oige;
# Remove surrounding angle brackets
$output =~ s/\<\>//og;
}
# Literal Strings (PDF 1.7 section 7.3.4.2)
else {
# Remove surrounding parentheses
$input =~ s/^\s*\((.*)\)\s*$/$1/os;
my $cr = '(?:\015\012|\015|\012)';
my $prev_input;
while ($input) {
if (defined $prev_input and $input eq $prev_input) {
die "Infinite loop while parsing literal string";
}
$prev_input = $input;
# Convert bachslash followed by up to three octal digits
# into that binary byte
if ($input =~ /^\\([0-7]{1,3})(.*)/os) {
$output .= chr(oct($1));
$input = $2;
}
# Convert backslash followed by an escaped character into that
# character
elsif ($input =~ /^\\([nrtbf\\\(\)])(.*)/osi) {
$output .= $trans{$1};
$input = $2;
}
# Ignore backslash followed by an end-of-line marker
elsif ($input =~ /^\\$cr(.*)/os) {
$input = $1;
}
# Convert an unescaped end-of-line marker to a line-feed
elsif ($input =~ /^\015\012?(.*)/os) {
$output .= "\012";
$input = $1;
}
# Check to see if there are any other special sequences
elsif ($input =~ /^(.*?)((?:\\(?:[nrtbf\\\(\)0-7]|$cr)|\015\012?).*)/os) {
$output .= $1;
$input = $2;
}
else {
$output .= $input;
$input = undef;
}
}
}
return $output;
}
=head2 $s->val
Returns the value of this string (the string itself).
=cut
sub val {
return $_[0]->{'val'};
}
=head2 $->as_pdf
Returns the string formatted for output as PDF for PDF File object $pdf.
=cut
sub as_pdf {
my ($self) = @_;
my $str = $self->{'val'};
if ($self->{' isutf'}) {
$str = join('', map { sprintf('%04X' , $_) } unpack('U*', $str) );
return "<FEFF$str>";
}
elsif ($self->{' ishex'}) { # imported as hex ?
$str = unpack('H*', $str);
return "<$str>";
}
else {
if ($str =~ m/[^\n\r\t\b\f\040-\176\200-\377]/oi) {
$str =~ s/(.)/sprintf('%02X', ord($1))/oge;
return "<$str>";
}
else {
$str =~ s/([\n\r\t\b\f\\()])/\\$out_trans{$1}/ogi;
return "($str)";
}
}
}
=head2 $s->outobjdeep
Outputs the string in PDF format, complete with necessary conversions
=cut
sub outobjdeep {
my ($self, $fh, $pdf, %opts) = @_;
$fh->print($self->as_pdf($pdf));
}
1;

View File

@@ -0,0 +1,140 @@
#=======================================================================
#
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
#
# Copyright Martin Hosken <Martin_Hosken@sil.org>
#
# No warranty or expression of effectiveness, least of all regarding
# anyone's safety, is implied in this software or documentation.
#
# This specific module is licensed under the Perl Artistic License.
#
#=======================================================================
package PDF::API2::Basic::PDF::Utils;
use strict;
our $VERSION = '2.033'; # VERSION
=head1 NAME
PDF::API2::Basic::PDF::Utils - Utility functions for PDF library
=head1 DESCRIPTION
A set of utility functions to save the fingers of the PDF library users!
=head1 FUNCTIONS
=cut
use PDF::API2::Basic::PDF::Array;
use PDF::API2::Basic::PDF::Bool;
use PDF::API2::Basic::PDF::Dict;
use PDF::API2::Basic::PDF::Name;
use PDF::API2::Basic::PDF::Null;
use PDF::API2::Basic::PDF::Number;
use PDF::API2::Basic::PDF::String;
use PDF::API2::Basic::PDF::Literal;
use Exporter;
use vars qw(@EXPORT @ISA);
@ISA = qw(Exporter);
@EXPORT = qw(PDFBool PDFArray PDFDict PDFName PDFNull
PDFNum PDFStr PDFStrHex PDFUtf);
=head2 PDFBool
Creates a Bool via PDF::API2::Basic::PDF::Bool->new
=cut
sub PDFBool {
return PDF::API2::Basic::PDF::Bool->new(@_);
}
=head2 PDFArray
Creates an array via PDF::API2::Basic::PDF::Array->new
=cut
sub PDFArray {
return PDF::API2::Basic::PDF::Array->new(@_);
}
=head2 PDFDict
Creates a dict via PDF::API2::Basic::PDF::Dict->new
=cut
sub PDFDict {
return PDF::API2::Basic::PDF::Dict->new(@_);
}
=head2 PDFName
Creates a name via PDF::API2::Basic::PDF::Name->new
=cut
sub PDFName {
return PDF::API2::Basic::PDF::Name->new(@_);
}
=head2 PDFNull
Creates a null via PDF::API2::Basic::PDF::Null->new
=cut
sub PDFNull {
return PDF::API2::Basic::PDF::Null->new(@_);
}
=head2 PDFNum
Creates a number via PDF::API2::Basic::PDF::Number->new
=cut
sub PDFNum {
return PDF::API2::Basic::PDF::Number->new(@_);
}
=head2 PDFStr
Creates a string via PDF::API2::Basic::PDF::String->new
=cut
sub PDFStr {
return PDF::API2::Basic::PDF::String->new(@_);
}
=head2 PDFStrHex
Creates a hex-string via PDF::API2::Basic::PDF::String->new
=cut
sub PDFStrHex {
my $string = PDF::API2::Basic::PDF::String->new(@_);
$string->{' ishex'} = 1;
return $string;
}
=head2 PDFUtf
Creates a utf8-string via PDF::API2::Basic::PDF::String->new
=cut
sub PDFUtf {
my $string = PDF::API2::Basic::PDF::String->new(@_);
$string->{' isutf'} = 1;
return $string;
}
1;