| dotReader documentation | Contained in the dotReader distribution. |
dtRdr::Book - base and factory class for books
dtRdr::Book->import(%arguments);
Called by 'use dtRdr::Book (%arguments)'. Only applicable to plugins.
A hash reference which is passed to register_plugin().
use dtRdr::Book (register => {});
use dtRdr::Book (register => {foo => "bar"});
Registers your plugin with dtRdr::Plugins::Book.
dtRdr::Book->register_plugin(%args);
Create a new object and load the uri.
dtRdr::Book->new_from_uri($uri);
my $class = dtRdr::Book->identify_by_uri($uri);
These methods may be overridden by subclasses.
$book = dtRdr::Book->new();
Subclasses need to implement these.
$book->load_uri($uri);
$book->identify_uri();
my $bool = $book->member_exists($filepath);
Gets the string for the member at path $filepath.
my $string = $book->get_member_string($filepath);
Returns the HTML and associated image and object data for a portion of the book.
my $html = $book->get_content($toc)
The $toc object is a ('dtRdr::TOC') node representing the content
location.
my $html = $book->get_content_by_id($id);
Unprocessed content for a given node.
$book->get_raw_content($toc);
Copyable content for a given node.
$book->get_copy_content($toc);
Gets all of the rendered xml content for a $toc node.
my $xml = $book->get_trimmed_content($toc);
A class method that says whether the book needs special treatment.
Create and store a checksum for the book.
$book->mk_fingerprint($stringref);
Can only happen once.
$book->set_id($id);
$book->add_note($note); $book->add_bookmark($bookmark); $book->add_highlight($highlight);
Notify callbacks and IO subsystem that the annotation has changed.
$note = $book->change_note($note); $bookmark = $book->change_bookmark($bookmark); $highlight = $book->change_highlight($highlight);
Alternatively, for internal usage, will replace an annotation object with a new one bearing the same id. If the object returned is not the same reference that you passed, you should use the one which is returned.
$book->delete_note($note); $book->delete_bookmark($bookmark); $book->delete_highlight($highlight);
my $note = $book->find_note($anno_id); my $bookmark = $book->find_bookmark($anno_id); my $highlight = $book->find_highlight($anno_id);
Get all highlights for a given node, including ancestors, those that start in older siblings, etc.
my @notes = $book->node_notes($node); my @bookmarks = $book->node_bookmarks($node); my @highlights = $book->node_highlights($node);
Highlights which structurally belong to the given TOC node.
my @notes = $book->local_notes($node); my @bookmarks = $book->local_bookmarks($node); my @highlights = $book->local_highlights($node);
Look for notes in the same thread and build them into a tree. Returns a dtRdr::NoteThread object, unless the note is a single, in which case it returns only the note object.
my $thread = $book->note_thread($note);
$note may be a note object or note id.
A really stupid name? Maybe just in the wrong place.
my @pile = $book->find_note_references($note);
$book->drop_selections;
Globalize a Node Position
$self->_GP_to_NP($node, $pos);
$self->_NP_to_GP($node, $pos);
$book->localize_annotation($anno, $node);
Get the annotation IO object.
Set the annotation IO object.
This sidesteps the issue of having distinct add_foo(), add_foo_nowrite() methods. If there is a serializer (see dtRdr::Annotation::IO) available, we call $action on it with $object and %args.
$book->do_serialize($action, $object, %args);
Disable the annotation I/O system while running an action. Designed for use by the I/O system.
$book->io_action(sub {...});
Insert the note, bookmark, and highlight data for the given node.
$content = $self->insert_nbh($node, $content);
This also creates the cache_chars data needed for locate_string()
operations.
TODO a better name (like rapidfire_xml_hatchet or something)
ALSO TODO currently, books need to call this from their get_content() or else they have no specialness. Books really must call this though, since e.g. mozilla requires that the images get rewritten as base64 encoded.
Finds $string in $node and returns a Selection object (possibly for a different node) with RENDERED POSITIONS.
my $range = $book->locate_string($node, $string, $lwing, $rwing);
my @matches = _context_match(\$string, $left, $search, $right);
Returns a list of match pairs: @matches = ([$start, $end], ...);
Returns a Range with the appropriate TOC node and start/end positions.
my $range = $book->reduce_word_scope($node, $start, $end);
The characters (for use in annotation placement) are cached by insert_nbh(), but can also be created and managed with these methods.
The cache is currently permanent WRT the life of the object. This is, of course, a slow memory leak. If you're short on ram, please send code and/or time.
my $bool = $book->has_cached_node_characters($node);
Saves the characters in the cache, and will eventually implement cache management.
$book->cache_node_characters($node, $chars);
Get the cache characters for a given TOC node.
my $chars = $book->get_cache_chars($node);
Get the characters for $node. Will check the cache and/or create them.
my $chars = $book->get_node_characters($node);
my $chars = $book->create_node_characters($node);
Every book has a table of contents.
my $root = $book->get_toc;
See TOC for tree-related methods and below for book-related methods (book related methods can transcend the tree structure, allowing us to support less-than-elegant book formats.)
Find the TOC node for $id.
my $toc = $book->find_toc($id);
my @nodes = $book->descendant_nodes($node);
my @nodes = $book->ancestor_nodes($node);
my @visible = $book->visible_nodes;
Gets the xml callbacks for xml rewrite processing.
my %dispatch = $book->xml_callbacks;
This %dispatch table will contain start and end keys, below which
will be keys for individual xml elements (aka 'a', 'img', etc.)
For the basic case of adding or overriding element handlers, plugins should define the augment_xml_callbacks() method rather than overriding this method. No mechanism is provided or planned for chained handlers -- in that case you should override, then curry as needed.
Callbacks are fired during the parse() method of dtRdr::BookUtil::AnnoInsert. The parameters are as follows.
$subref->(
$book,
node => $node,
parser => $parser,
attributes => \%atts,
before => \$before,
during => \$during,
after => \$after
);
The $before, $during, and $after references are strings which the plugin should modify as needed.
Note that this is very similar to the XML start/end handler. The only thing missing is the element name (you know that already.)
The book object (aka $self.)
The Expat object.
The %atts hash which was fed to the tag handler by the parser (only in start tag.)
A reference to the hoppers string intended to be placed before the element.
A reference to the $parser->recognized_string value.
A referece to the hoppers string intended to be placed after the element.
Note (again) that before, during, and after are *references* to strings. This is speed-critical, or else the API would be more user-friendly.
This key is replaced with the value of $book->XML_CONTENT_NODE (or dropped if there is no such method.) This builtin callback provides the copy_ok handling (and maybe more later.)
Just a constant, but may be overridden.
my %dispatch = $self->_standard_xml_callbacks;
dtRdr::Book::Zombie - Books that walk around mindlessly
Though they don't eat flesh, these are the undead books. They have no content or other book-like capabilities besides an id and possibly a uri.
They may be resurrect()ed, though that magic isn't done yet. Similarly, calling $book->kill should turn it into a zombie.
dtRdr::Book::Zombie->new(id => $id);
Eric Wilhelm <ewilhelm at cpan dot org>
Copyright (C) 2006-2007 Eric L. Wilhelm and OSoft, All Rights Reserved.
Absolutely, positively NO WARRANTY, neither express or implied, is offered with this software. You use this software at your own risk. In case of loss, no person or entity owes you anything whatsoever. You have been warned.
The dotReader(TM) is OSI Certified Open Source Software licensed under the GNU General Public License (GPL) Version 2, June 1991. Non-encrypted and encrypted packages are usable in connection with the dotReader(TM). The ability to create, edit, or otherwise modify content of such encrypted packages is self-contained within the packages, and NOT provided by the dotReader(TM), and is addressed in a separate commercial license.
You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
| dotReader documentation | Contained in the dotReader distribution. |
package dtRdr::Book; $VERSION = eval{require version}?version::qv($_):$_ for(0.10.1); use warnings; use strict; use Carp; use XML::Parser::Expat; use Digest::MD5; use URI; use dtRdr::Plugins::Book; use dtRdr::TOC; use dtRdr::Metadata::Book; use dtRdr::String::Splicer; use dtRdr::Logger; # the callback object is magic class data use dtRdr::Callbacks::Book; use dtRdr::Selection; use dtRdr::NoteThread; use dtRdr::Traits::Class qw( NOT_IMPLEMENTED ); use Class::Accessor::Classy; ro qw( notes bookmarks highlights selections toc meta ); ri qw( id title ); rw 'anno_io'; rs fingerprint => \ (my $set_fingerprint); no Class::Accessor::Classy; use Method::Alias qw( has_cached_NC has_cached_node_characters get_cached_NC get_cache_chars get_NC get_node_characters create_NC create_node_characters cache_NC cache_node_characters );
sub import { my $self = shift; my @args = @_; # only do something if requested @args or return(); (@args %2) and croak("odd number of elements in argument hash"); my %args = @args; $args{register} or return(); # XXX until we learn new tricks at least $args{'register'}{'class'} ||= caller; # just like: use dtRdr::Book ( register => {class => __PACKAGE__} ); $self->register_plugin(%{$args{register}}); } # end subroutine import definition ########################################################################
sub register_plugin { my $self = shift; (@_ %2) and croak("odd number of elements in argument hash"); my %args = @_; $args{'class'} ||= caller; dtRdr::Plugins::Book->add_class(%args); } # end subroutine register_plugin definition ########################################################################
# TODO: # new_by_uri - creates object but doesn't load # identify_by_foo - $_->identify_foo for (@plugins)
sub new_from_uri { my $self = shift; my ($uri) = @_; my ($class, $cache) = $self->identify_by_uri($uri); if($class) { my $book = $class->new(); # $cache should always have only one key my ($cache_key, @o) = keys(%$cache); @o and die "'$class' overfilled the cache in identify_uri()"; # see if it has a cache-utilizing constructor method my $method = $cache_key ? "load_from_$cache_key" : 'load_uri'; $method = 'load_uri' unless($book->can($method)); $book->$method( ($cache_key ? $cache->{$cache_key} : $uri)) or die "factory call to $book->$method failed"; # XXX and then try more plugins? return($book); } die "could not find a suitable plugin for '$uri'"; } # end subroutine new_from_uri definition ########################################################################
sub identify_by_uri { my $self = shift; my ($uri) = @_; # FIXME make cache a passed-in reference # (so we can call this from elsewhere and not deal with that) my @plugins = dtRdr::Plugins::Book->get_classes(); @plugins or die("no book plugins defined"); my %cache; foreach my $class (@plugins) { RL('#plugin')->debug("Loading Plugin $class"); if(my $ref = $class->can('identify_uri')) { if($ref eq __PACKAGE__->can('identify_uri')) { RL('#plugin')->warn("$class unfinished"); # XXX de-register? next; } } else { # XXX de-register? RL('#plugin')->warn("$class is incompetent"); next; } # warn "ask $class"; # optimizes repassing objects if the identify_ method in one plugin # creates one -- also: skip those that don't work with that type of # object? my ($res, $res_c) = $class->identify_uri( $uri, %cache ? {%cache} : ()); $res and return($class, ($res_c ? ($res_c) : {})); L->debug("$class declined $uri"); %cache = ($res_c ? (%$res_c) : ()); # no return -> drop existing cache } return(); } # end subroutine identify_by_uri definition ########################################################################
sub new { my $package = shift; my $class = ref($package) || $package; my $self = {}; my %defaults = ( cache_chars => {}, highlights => {}, bookmarks => {}, notes => {}, selections => {}, ); foreach my $k (keys(%defaults)) { $self->{$k} = $defaults{$k}; } bless($self, $class); $self->{meta} ||= $self->can('METADATA_CLASS') ? $self->METADATA_CLASS->new : dtRdr::Metadata::Book->new; return($self); } # end subroutine new definition ########################################################################
sub load_uri { $_[0]->NOT_IMPLEMENTED(@_[1..$#_]); }
sub identify_uri { $_[0]->NOT_IMPLEMENTED(@_[1..$#_]); }
sub member_exists { $_[0]->NOT_IMPLEMENTED(@_[1..$#_]); }
sub get_member_string { $_[0]->NOT_IMPLEMENTED(@_[1..$#_]); }
sub get_content { my $self = shift; $self->NOT_IMPLEMENTED(@_); }
sub get_content_by_id { my $self = shift; $self->NOT_IMPLEMENTED(@_); }
sub get_raw_content { my $self = shift; $self->NOT_IMPLEMENTED(@_); }
sub get_copy_content { my $self = shift; $self->NOT_IMPLEMENTED(@_); }
sub get_trimmed_content { my $self = shift; $self->NOT_IMPLEMENTED(@_); }
sub is_nesty { my $self = shift; $self->NOT_IMPLEMENTED(@_); }
sub mk_fingerprint { my $self = shift; my ($data) = @_; my $digest = Digest::MD5->new; # TODO $data could be a filehandle, filename, etc if(ref($data) eq 'SCALAR') { $digest->add($$data); } else { croak('must have a scalar reference'); } $self->$set_fingerprint( $digest->hexdigest ); } # end subroutine mk_fingerprint definition ########################################################################
sub set_id { my $self = shift; my ($id) = @_; defined($self->id) and croak("cannot change book id"); $self->SUPER::set_id($id); } # end subroutine set_id definition ########################################################################
######################################################################## # a pile of nearly identical methods for each annotation type ######################################################################## foreach my $foo (qw(note bookmark highlight selection)) { # pluralize my $foos = $foo . 's'; # type map my $foo_type = { bookmark => 'dtRdr::Bookmark', highlight => 'dtRdr::Highlight', note => 'dtRdr::Note', selection => 'dtRdr::AnnoSelection', }->{$foo}; $foo_type or die "ack $foo"; my $persist = { 'bookmark' => 1, 'highlight' => 1, 'note' => 1, }->{$foo}; # and we'll need a getter my $get_foos = 'get_' . $foos; # methods we'll create and maybe call my $add_foo = 'add_' . $foo; my $change_foo = 'change_' . $foo; my $delete_foo = 'delete_' . $foo; my $find_foo = 'find_' . $foo; my $node_foos = 'node_' . $foos; my $local_foos = 'local_' . $foos; my $related_foos = 'related_' . $foos;
my $add_foo_sub = sub { my $self = shift; my ($anno) = @_; eval { $anno->isa($foo_type) } or croak("Not a $foo"); my $node_id = $anno->node->id; # memorize it my $data = $self->$get_foos; $data->{$node_id} ||= {}; $data->{$node_id}{$anno->id} = $anno; #WARN "added $anno"; # hope this isn't a speed issue $self->get_callbacks->annotation_created($anno); # persist it $self->do_serialize('insert', $anno) if($persist); return($anno); }; # end subroutine $add_foo_sub definition ########################################################################
my $change_foo_sub = sub { my $self = shift; my ($anno) = @_; my $node_id = $anno->node->id; my $anno_id = $anno->id; my $data = $self->$get_foos; $data->{$node_id} or croak("cannot change that annotation"); my $had = $data->{$node_id}{$anno_id}; unless($had == $anno) { # NOT: $data->{$node_id}{$anno_id} = $anno; # oops, what about those who had a handle on it? # XXX THIS SCARES ME, # but it is probably (at least roughly) the way to go %{$had} = %{$anno}; } $self->get_callbacks->annotation_changed($had); $self->do_serialize('update', $had) if($persist); return($had); }; # end subroutine $change_foo_sub definition ########################################################################
my $delete_foo_sub = sub { my $self = shift; my ($anno) = @_; eval { $anno->isa($foo_type) } or croak("Not a $foo"); # all data my $data = $self->$get_foos; # node data my $node_data = $data->{$anno->node->id}; $node_data or return(); delete($node_data->{$anno->id}); $self->get_callbacks->annotation_deleted($anno); # persist it $self->do_serialize('delete', $anno) if($persist); return($anno); }; # end subroutine $delete_foo_sub definition ########################################################################
my $find_foo_sub = sub { my $self = shift; my ($anno_id) = @_; my $data = $self->$get_foos; foreach my $key (keys %$data) { if(my $anno = $data->{$key}{$anno_id}) { return($anno); } } return(); }; # end subroutine $find_foo_sub definition ########################################################################
my $node_foos_sub = sub { my $self = shift; my ($node) = @_; $node or croak("must have a node"); my @local = $self->$local_foos($node); # check the ancestor/descendant tree and pre-calculate local RP's my @others = $self->$related_foos($node); # @others and warn "my others is @others"; # NOTE not worth sorting here, since we have to build a @todo list # that is just a group of positions, -- i.e. overlapping highlights # don't sort start-before-start return(@local, @others); }; # end subroutine $node_foos_sub definition ########################################################################
my $local_foos_sub = sub { my $self = shift; my ($node) = @_; my $foo_ref = $self->$get_foos; #WARN($foo_ref ? (%{$foo_ref}) : 'none', " ($foo) for node id:",$node->id); return(values(%{ $foo_ref->{$node->id} || {} })); }; # end subroutine $local_foos_sub definition ########################################################################
my $related_foos_sub = sub { my $self = shift; my ($node) = @_; my @related = map({$self->$local_foos($_)} $self->descendant_nodes($node), $self->ancestor_nodes($node), ); # @related and warn "\nrelated: @related"; # @related and warn "first is ", $related[0]->node->id; return(map({$self->localize_annotation($_, $node)} @related)); }; # end subroutine $related_foos_sub definition ######################################################################## my $package = __PACKAGE__; { # now just install them no strict 'refs'; *{$package . '::' . $add_foo} = $add_foo_sub; *{$package . '::' . $change_foo} = $change_foo_sub; *{$package . '::' . $delete_foo} = $delete_foo_sub; *{$package . '::' . $find_foo} = $find_foo_sub; *{$package . '::' . $node_foos} = $node_foos_sub; *{$package . '::' . $local_foos} = $local_foos_sub; *{$package . '::' . $related_foos} = $related_foos_sub; } } # end pile of nearly identical methods ######################################################################## ########################################################################
sub note_thread { my $self = shift; my ($note) = @_; my @pile = $self->find_note_references($note); (1 == scalar(@pile)) and return($pile[0]); my ($thread, @other) = dtRdr::NoteThread->create(@pile); @other and die "threading broke"; # should return only one thread return($thread); } # end subroutine note_thread definition ########################################################################
sub find_note_references { my $self = shift; my ($note) = @_; # TODO let that be an id my $root_id = $note->id; if(my @r = $note->references) { $root_id = $r[-1]; } my @pile = $self->local_notes($note->node); my @thread = grep({ my @r; ((@r = $_->references) ? $r[-1] : $_->id) eq $root_id; } @pile); return(@thread); } # end subroutine find_note_references definition ########################################################################
sub drop_selections { my $self = shift; $self->{selections} = {}; } # end subroutine drop_selections definition ########################################################################
sub _GP_to_NP { my $self = shift; my ($node, $gp) = @_; my $np = $gp - $node->word_start; return($np); } # end subroutine _GP_to_NP definition ########################################################################
sub _NP_to_GP { my $self = shift; my ($node, $np) = @_; my $gp = $np + $node->word_start; return($gp); } # end subroutine _NP_to_GP definition ########################################################################
sub localize_annotation { my $self = shift; my ($anno, $node) = @_; my $dbg = 0; $dbg and warn "try to localize ", $anno->node->id; # TODO make this a proper method # also, what to do for out-of-range? my $localize = sub { my ($snode, $dnode, $s_rp) = @_; my $s_np = $self->_RP_to_NP($snode, $s_rp); my $gp = $self->_NP_to_GP($snode, $s_np); my $d_np = $self->_GP_to_NP($dnode, $gp); # out-of-range? my $d_rp = $self->_NP_to_RP($dnode, $d_np); $dbg and warn join("\n ", 'details', "source_rp: $s_rp", "source_np: $s_np", "gp: $gp", "dest_np: $d_np", "dest_rp: $d_rp", ), "\n "; return($d_rp); }; if($anno->isa('dtRdr::Range')) { my ($s_rp, $e_rp) = map({ my $method = 'get_' . $_ . '_pos'; # play nice with Boundless my $sloc = $anno->$method; defined($sloc) or die "no offset"; $localize->($anno->node, $node, $sloc); } qw(start end)); my $nlength = $self->_NP_to_RP($node, $node->word_end - $node->word_start); if(($s_rp < 0) and ($e_rp > $nlength)) { # it spans it $s_rp = 0; $e_rp = $nlength; } else { # does it even touch the $node? return if( (($s_rp < 0) and ($e_rp < 0)) or (($s_rp > $nlength) and ($e_rp > $nlength)) ); } 0 and warn "ok -- got this far"; # otherwise, adjust min/max values ($s_rp < 0) and ($s_rp = 0); ($e_rp > $nlength) and ($e_rp = $nlength); my $local = $anno->renode($node, range => [$s_rp, $e_rp]); return($local); } else { die "dunno how to do that yet jim"; } } # end subroutine localize_annotation definition ########################################################################
sub do_serialize { my $self = shift; my ($action, $object, %args) = @_; # can't serialize without one my $io = $self->anno_io or return; $io->can($action) or croak("incompetent? (cannot '$action') IO object: $io"); $io->$action($object, %args); } # end subroutine do_serialize definition ########################################################################
sub io_action { my $self = shift; my ($subref) = @_; local $self->{anno_io}; $subref->(); } # end subroutine io_action definition ########################################################################
sub insert_nbh { my $self = shift; my ($node, $content) = @_; eval {$node->isa('dtRdr::TOC')} or croak('not a TOC object (usage: $book->insert_nbh($node, $str); )'); RL('#speed')->info('running insert_nbh for ', $node->id); unless(@_ >= 2) { # get the content croak("must have content"); die "this part isn't done"; # TODO requires get_content() refactor # maybe something like _get_basic_content() ? # or just say YAGNI, bah, etc. } # notes are special my @notes = dtRdr::NoteThread->create($self->node_notes($node)); foreach my $note (@notes) { # each is actually a tree $note = $note->note unless($note->has_children); } # The node_foos() calls give offsets in local coordinates. # The todo list is a list of arrays -- we'll shift it down to nothing. # First value of each is the position, second is the object. my @todo = ( map({ [ $_->start_pos, $_ ], [ $_->end_pos, $_ ], } $self->node_highlights($node), @notes, $self->node_bookmarks($node), $self->node_selections($node), ), ); #WARN "todo is ", join("|", map({$_->[1]} @todo)); # we have to sort it here to get overlapping highlights to work @todo = sort({$a->[0] <=> $b->[0]} @todo); RL('#speed')->info(scalar(@todo), ' todo items ... starting insert parse'); use dtRdr::BookUtil::AnnoInsert; my $parser = dtRdr::BookUtil::AnnoInsert->new($self, todo => \@todo, node => $node, xml_callbacks => {$self->xml_callbacks}, ); $parser->parse($content); my $output = $parser->done; DBG_DUMP('INSERT', 'thecontent.xml', sub {$output}); RL('#speed')->info('insert_nbh done'); return($output); } # end subroutine insert_nbh definition ########################################################################
sub locate_string { my $self = shift; my ($node, $str, $lstr, $rstr) = @_; ###################################################################### my $cache = $self->get_cache_chars($node); my ($s, $l, $r) = ($str, $lstr, $rstr); ###################################################################### # NOTE: # xml allows < > & " &apos # all twig passes is < and & -- all else gets encoded # so, the assumption is that there is no " etc in the node_chars s/&/&/gs for($s,$l,$r); s/</</gs for($s,$l,$r); ###################################################################### # a bit of fixup $_ =~ s/\s+/ /gs for($s, $l, $r); # singularize whitespace $_ =~ s/^ $//s for($l, $r); # null wings are an issue $s =~ s/^ // if($l =~ m/ $/); # migrate leading space # XXX should we be able to allow # trailing space on the selection # without a right-context? (which # only happens at end-of-node) $s =~ s/ $// if($r =~ m/^(?: |$)/); # migrate trailing space # no trailing/leading space in selection # XXX this is a forced migration (as opposed to the context-wins one above) if($s =~ s/ $//) { $r = ' ' . $r if($r ne ''); } if($s =~ s/^ //) { $l = $l . ' ' if($l ne ''); } DBG_DUMP('CACHE', 'cache', sub {$cache}); # the search: # my $pos = index($cache, "$l$s$r"); # # TODO check that this was unique? # DBG_DUMP('CACHE', 'dump', sub {"$l$s$r\n$cache\n\n# vim\:nowrap"}); # unless($pos >= 0) { # XXX this if() won't fit the m// approach # # XXX some problem with getting utf8 in the selection when we had # # given it an html-escaped character # DBG_DUMP('SEARCH', 'bah', # sub {"search failed on\r$l$s$r\r$cache\r\r# vim:nowrap"} # ); # L->error( # "could not find >>>$l$s$r<<<" . # (0 ? " in >>>$cache<<<" : '') . # "(pos $pos)" # ); # # TODO try a little harder: # # 1. search for just $string, $lwing, or $rwing? # # 2. 0,1,2,... and -1,-2,-3,... word search? # return(); # } # 0 and WARN("\n\nindex found ", # join(", ", map({length($_)} $l, $s, $r, $cache)), "|", # join(", ", # $pos, $pos+length($l), # $pos+length($l.$s), $pos+length($l.$s.$r) # )); # my @len = map({length($_)} $l, $s, $r); # my ($spos, $epos) = ( # $pos + $len[0], # $pos + $len[0] + $len[1] # ); # ###################################################################### my @matches = _context_match(\$cache, $l, $s, $r); unless(@matches) { L->warn("no matches"); return(); } # XXX "too many" is an error, but we need to leave it as non-fatal at # least to accomodate the (currently) incompetent widgets (@matches > 1) and L->warn("too many matches"); my ($spos, $epos) = @{$matches[0]}; # adjust $node for tightness my $range = $self->reduce_word_scope($node, $spos, $epos); # turn into a selection my $selection = dtRdr::Selection->claim($range); # set the selected text property on it $selection->set_selected(substr($cache, $spos, $epos - $spos)); # and a context based on the reduced-scope node # TODO oops, that's going to require some math here to avoid calling # get_content on that node :-/ # $selection->set_context([$left,$right]); return($selection); } # end subroutine locate_string definition ########################################################################
sub _context_match { my ($str, $li, $si, $ri) = @_; if(my $ref = ref($str)) { ($ref eq 'SCALAR') or croak("reference, but not a scalar ($ref)"); } else { # I guess make a copy my $var = $str; $str = \$var; } my ($l, $s, $r) = ($li, $si, $ri); # prep them foreach my $v ($l, $s, $r) { $v =~ s/\s//g; L('search')->debug("now $v\n"); $v = join('\\s*', map({s/([^a-zA-Z0-9_-])/\\$1/; $_} split(//, $v))); L('search')->debug("done $v\n"); } L('search')->debug("search $l|$s|$r\n"); my @starts; my @ends; while($$str =~ m/($l)\s*($s)\s*($r)/g) { my @s = @-; my @e = @+; # we need to shift() each of these since the first will be the # entire match shift(@s); shift(@e); (@$_ == 3) or die for(\@s, \@e); L('search')->debug("found ", join("|", map({join(",", @$_)} \@s, \@e))); push(@starts, $s[1]); push(@ends, $e[1]); } @starts or return(); (@starts == @ends) or die "something went wrong there @starts @ends"; my @matches = map({[$starts[$_], $ends[$_]]} 0..$#starts); L('search')->debug(scalar(@matches), " matches"); return(@matches); } # end subroutine _context_match definition ########################################################################
sub reduce_word_scope { my $self = shift; my ($node, $spos, $epos) = @_; my ($s,$e) = ($spos, $epos); my $nstart = $node->word_start; $_ += $nstart for($s,$e); my $found; foreach my $d (reverse($node->descendants)) { # other book classes need to deal with black holes defined($d->word_start) or die $d->id, " node did not get an aot entry!"; if(($d->word_start <= $s) and ($d->word_end >= $e)) { $found = $d; last; } } if($found) { # now adjust $spos, $epos correctly my $delta = $found->word_start - $node->word_start; # not correct $spos = $spos - $delta; $epos = $epos - $delta; } else { # positions are golden $found = $node; } return(dtRdr::Range->create(node => $found, range => [$spos, $epos])); } # end subroutine reduce_word_scope definition ########################################################################
sub has_cached_node_characters { my $self = shift; my ($node) = @_; return(exists($self->{cache_chars}{$node->id})); } # end subroutine has_cached_node_characters definition ########################################################################
sub cache_node_characters { my $self = shift; my ($node, $chars) = @_; eval {$node->isa('dtRdr::TOC')} or Carp::confess('not a TOC object'); # XXX this caching mechanism will turn into a memory leak => do # something like close_book() or else have it be a short # (maybe three-element) array. $self->{cache_chars}{$node->id} = $chars; } # end subroutine cache_node_characters definition ########################################################################
sub get_cache_chars { my $self = shift; my ($toc) = @_; eval {$toc->isa('dtRdr::TOC')} or croak('not a TOC object (usage: $book->get_cache_chars($node);)'); my $id = $toc->id; # you should only call this if you know they are there croak("no cache for node:'$id'") unless($self->has_cached_node_characters($toc)); return($self->{cache_chars}{$id}); } # end subroutine get_cache_chars definition ########################################################################
sub get_node_characters { my $self = shift; my ($node) = @_; if($self->has_cached_node_characters($node)) { return($self->get_cache_chars($node)); } else { my $chars = $self->create_node_characters($node); $self->cache_node_characters($node, $chars); return($chars); } } # end subroutine get_node_characters definition ########################################################################
sub create_node_characters { my $self = shift; my ($node) = @_; my $content = $self->get_trimmed_content($node); $content or die("got no content for node: ", $node->id); my $chars; # NOTE the difference is 33.8s vs 20.0s for dr_search perl.book 'open' if(0) { # these are not the slow droids you're looking for my $twig = XML::Twig->new( keep_spaces => 1 ); eval { $twig->parse($content); }; $@ and die("parse failed: '$@'"); $chars = $twig->root->text; # make it align with our typical cache $chars =~ s/&/&/g; $chars =~ s/</</g; } else { # this is slightly faster, but maybe still not correct # NOTE particularly, we seem to have trouble with various forms of # whitespace (probably causes some position issues.) # ALSO unless we get the utf8 magic straight, HTML::Entities will # get it all wrong # NOTE we don't support character declarations doing it like this -- # but we would need to weld that into $content to make twig happy # above anyway. $chars = $content; $chars =~ s/<[^>]+>//g; require HTML::Entities; require Encode; # make sure it is unicode $chars = Encode::decode_utf8($chars); # may want to check that at some point #Encode::is_utf8($chars) or die "ack"; #utf8::valid($chars) or die "ack"; # now de-entity it all $chars = HTML::Entities::decode_entities($chars); my $kill_spaces = '' . chr(160) . # is /  "\x{85}" . "\x{2028}\x{2029}" . ''; $chars =~ s/[$kill_spaces]+/ /g; $chars =~ s/&/&/g; $chars =~ s/</</g; } if((not $node->is_root) and $self->whitespace_before($node)) { $chars =~ s/^\s+//; } $chars =~ s/\s+/ /g; return($chars); } # end subroutine create_node_characters definition ########################################################################
# plain accessor
sub find_toc { my $self = shift; my ($id) = @_; return($self->toc->get_by_id($id)); } # end subroutine find_toc definition ########################################################################
sub descendant_nodes { my $self = shift; my ($node) = @_; return($node->descendants); } # end subroutine descendant_nodes definition ########################################################################
sub ancestor_nodes { my $self = shift; my ($node) = @_; return($node->ancestors); } # end subroutine ancestor_nodes definition ########################################################################
sub visible_nodes { my $self = shift; my $toc = $self->toc or croak('no toc'); my @vis; $toc->rmap(sub { my ($n) = @_; $n->visible and push(@vis, $n); }); return(@vis); } # end subroutine visible_nodes definition ########################################################################
sub xml_callbacks { my $self = shift; my %callbacks = $self->_standard_xml_callbacks; # rename the _content_node handlers foreach my $set (keys(%callbacks)) { if(my $item = delete($callbacks{$set}{_content_node})) { if($self->can('XML_CONTENT_NODE')) { my $node_name = $self->XML_CONTENT_NODE; exists($callbacks{$set}{$node_name}) and croak("cannot have '$node_name' as your XML_CONTENT_NODE"); $callbacks{$set}{$node_name} = $item; } } } # TODO the other part # my %child_says = $self->augment_xml_callbacks; # XXX actually won't have a use for that yet return(%callbacks); } # end subroutine xml_callbacks definition ########################################################################
sub _standard_xml_callbacks { my $self = shift; # only do this once per xml run my $copy_image = $self->get_callbacks->img_src_rewrite( $self->get_callbacks->core_link('dr_copy_link.png'), $self ); return( start => { 'img' => sub { # TODO only if there's an img_src_rewrite installed my $self = shift; my %params = @_; my $str = $params{during}; if(my ($s, $src, $e) = $$str =~ m/(.*src=")([^"]+)(.*)/) { $src = $self->get_callbacks->img_src_rewrite($src, $self); $$str = $s . $src . $e; } }, # img '_content_node' => sub { # is hopefully book-agnostic my $self = shift; my %params = @_; my $aft = $params{after}; my $node = $params{node}; my $toc = $node->get_by_id($params{attributes}{id}); $toc or die "cannot find a toc for this subnode"; if($toc->copy_ok) { my $copy_link = '<a class="dr_copy" ' . 'href="' . URI->new('dr://LOCAL/'. $toc->id . '.copy')->as_string . '">' . '<img class="dr_copy" src="' . $copy_image . '" /></a>'; $$aft .= $copy_link; } }, # _content_node 'a' => sub { my $self = shift; my %params = @_; my $str = $params{during}; if(my ($s, $href, $e) = $$str =~ m/(.*href=")([^"]+)(.*)/) { my $refsub = sub { # TODO is its own callback? my ($ref) = @_; my $render_id = $self->id; if($ref =~ m/^pkg:/) { # explicit, but maybe dirty my $uri = URI->new($ref); my $auth = $self->_id_escape( URI::Escape::uri_unescape($uri->authority) # bah ); my $rstring = 'pkg://' . $auth . $uri->path; return($rstring); } elsif($ref !~ m/^\w{3,6}:/) { # relative link my $rstring = 'pkg://'. $render_id . '/'. URI->new($ref)->as_string; return($rstring); } #WARN "unhandled link $ref"; return; }; $href = $refsub->($href); defined($href) or return; # don't mess with it $$str = $s . $href . $e; } }, # a }, # start end => {}, ); } # end subroutine _standard_xml_callbacks definition ######################################################################## ######################################################################## { # Zombie package package dtRdr::Book::Zombie; use warnings; use strict; use Carp; use base 'dtRdr::Book';
sub new { my $package = shift; (@_%2) and croak("odd number of elements in argument hash"); my (%args) = @_; defined($args{id}) or croak("must have id argument for zombie"); my $self = $package->SUPER::new(@_); $self->set_id($args{id}); $self->{toc} = dtRdr::Book::Zombie::TOC->new(book => $self); return($self); } # end subroutine new definition ######################################################################## # TODO is_alive accessor/constant? # kill/revive methods? { package dtRdr::Book::Zombie::TOC; use Class::Accessor::Classy; with 'new'; ro 'id'; ro 'book'; no Class::Accessor::Classy; sub isa { my $self = shift; my ($ask) = @_; return(1) if($ask eq 'dtRdr::TOC'); return $self->SUPER::isa($ask); } sub get_by_id { my $self = shift; my ($id) = @_; return((ref($self) || $self)->new(id => $id, book => $self->book)); } } # end toc } # end Zombie ########################################################################
1; # vim:ts=2:sw=2:et:sta