| Chemistry-Mok documentation | Contained in the Chemistry-Mok distribution. |
Chemistry::Mok - molecular awk interpreter
use Chemistry::Mok;
$code = '/CS/g{ $n++; $l += $match->bond_map(0)->length }
END { printf "Average C-S bond length: %.3f\n", $l/$n; }';
my $mok = Chemistry::Mok->new($code);
$mok->run({ format => mdlmol }, glob("*.mol"));
This module is the engine behind the mok program. See mok(1) for a detailed description of the language. Mok is part of the PerlMol project, http://www.perlmol.org.
Compile the code and return a Chemistry::Mok object. Available options:
packageIf the package option is given, the code runs in the
Chemistry::Mok::UserCode::$options{package} package instead of the
Chemistry::Mok::UserCode::Default package. Specifying a package name is
recommended if you have more than one mok object and you are using global
varaibles, in order to avoid namespace clashes.
pattern_formatThe name of the format which will be used for parsing slash-delimited patterns that don't define an explicit format. Mok versions until 0.16 only used the 'smiles' format, but newer versions can use other formats such as 'smarts', 'midas', 'formula_pattern', and 'sln', if available. The default is 'smarts'.
Run the code on the filenames contained in @args. $options is a hash reference with runtime options. Available options:
Generate 3D coordinates using Chemistry::3DBuilder.
"Aromatize" each molecule as it is read. This is needed for example for matching SMARTS patterns that use aromaticity or ring primitives.
Delete dummy atoms after reading each molecule. A dummy atom is defined as an atom with an unknown symbol (i.e., it doesn't appear on the periodic table), or an atomic number of zero.
If set to a true value, find bonds. Use it when reading files with no bond information but 3D coordinates to detect the bonds if needed (for example, if you want to do match a pattern that includes bonds). If the file has explicit bonds, mok will not try to find the bonds, but it will reassign the bond orders from scratch.
The format used when calling $mol_class->read. If not given, $mol_class->read tries to identify the format automatically.
The molecule class used for reading the files. Defaults to Chemistry::Mol.
0.25
Ivan Tubert-Brohman <itub@cpan.org>
Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| Chemistry-Mok documentation | Contained in the Chemistry-Mok distribution. |
package Chemistry::Mok; $VERSION = '0.25'; # $Id: Mok.pm,v 1.10 2005/05/16 21:54:21 itubert Exp $ use strict; use warnings; use Chemistry::Mol; use Chemistry::File ':auto'; use Chemistry::Pattern; use Chemistry::Bond::Find qw(find_bonds assign_bond_orders); use Chemistry::Ring 'aromatize_mol'; use Chemistry::3DBuilder 'build_3d'; use Text::Balanced ':ALL'; use Scalar::Util 'blessed'; use Data::Dumper; use Carp; our $DEBUG = 0;
sub tokenize { my ($self, $code) = @_; $code =~ s/\s*$//; # Text::Balanced complains about trailing whitespace #$code =~ s/^\s*#.*//g; # remove comments at the top of the file #unless($code =~ /^\s*([\/{#]|sub|BEGIN|END)/) { unless($code =~ /^(\s*#.*)*\s*([\/{]|sub|BEGIN|END|\w+:\s*\/)/) { print "MOK: adding implicit braces\n" if $DEBUG; $code = "{$code}"; # add implicit brackets for simple one-liners } #print "code = '$code'\n"; # (patt opt?)? code | sub code my @toks = extract_multiple(my $c = $code, [ { 'Chemistry::Mok::Comment' => qr/\s*#.*\s*/ }, { 'Chemistry::Mok::Patt' => sub { scalar extract_delimited($_[0],'/') } }, { 'Chemistry::Mok::Sub' => qr/\s*(?:END|BEGIN|sub\s+\w+)\s*/ }, { 'Chemistry::Mok::Block' => sub { scalar extract_codeblock($_[0],'{') } }, { 'Chemistry::Mok::PattLang' => qr/(\s*\w+):(?=\s*\/)/ }, { 'Chemistry::Mok::Opts' => qr/[gopGOP]+/ }, ], ); die "Mok: error extracting: $@" if $@; print "MOK: TOKENS:\n", Dumper(\@toks), "\nCODE:<<<<$code>>>>\n\n" if $DEBUG; @toks; } sub parse { my ($self, @toks) = @_; my (@subs, @blocks); for my $tok (@toks) { blessed $tok or die "unparsable token '$tok'\n"; } ### new parser my $st = 1; my ($patt, $opts, $block, $sub, $pattlang) = ('') x 5; my ($save) = 0; my $line; my $next_line = 1; while (my $tok = shift @toks) { $line = $next_line; $next_line += $$tok =~ y/\n//; print "MOK: LINE=$line;\nTOK=<<<<$$tok>>>>;\nNEXT_LINE=$next_line\n\n" if $DEBUG; next if $tok->isa("Chemistry::Mok::Comment"); if ($st == 1) { if ($tok->isa("Chemistry::Mok::Block")){ $block = $$tok, $save = 1; } elsif ($tok->isa("Chemistry::Mok::Sub")) { $sub = $$tok, $st = 5, next; } elsif ($tok->isa("Chemistry::Mok::PattLang")) { $pattlang = $$tok, $st = 4, next; } elsif ($tok->isa("Chemistry::Mok::Patt")) { $patt = $$tok, $st = 2, next; } } elsif ($st == 2) { if ($tok->isa("Chemistry::Mok::Block")){ $block = $$tok, $save = 1; } elsif ($tok->isa("Chemistry::Mok::Opts")){ $opts = $$tok, $st = 3, next; } } elsif ($st == 3) { if ($tok->isa("Chemistry::Mok::Block")){ $block = $$tok, $save = 1; } } elsif ($st == 4) { if ($tok->isa("Chemistry::Mok::Patt")){ $patt = $$tok, $st = 2, next; } } elsif ($st == 5) { if ($tok->isa("Chemistry::Mok::Block")){ $block = $$tok, $save = 1; } } else { confess "unknown state '$st'"; } if ($save) { # save block and go back to state 1 if ($sub) { push @subs, { block => "$sub $$tok", line => $line }; } else { push @blocks, { patt => $patt, opts => $opts, pattlang => $pattlang, block => $$tok, line => $line}; } $patt = $opts = $pattlang = $block = $sub = ''; $st = 1, $save = 0, next; } else { die "unexpected token '$$tok' (type '" . ref($tok) . "'\n"; } } print "MOK: BLOCKS\n", Dumper(\@blocks), "\nSUBS:\n", Dumper(\@subs), "\n" if $DEBUG; \@subs, \@blocks; } sub compile_subs { my ($self, @subs) = @_; my $pack = $self->{package}; for my $sub (@subs) { my $code = <<END; package Chemistry::Mok::UserCode::$pack; no strict; no warnings; #line $sub->{line} "mok code" $sub->{block} END print "MOK: COMPILING SUB: <<<<$code>>>>\n\n" if $DEBUG; eval $code; die "Mok: error compiling sub: $@" if $@; } } sub compile_blocks { my ($self, @blocks) = @_; my $pack = $self->{package}; my $format = $self->{pattern_format}; my @compiled_blocks; for my $block (@blocks) { #use Data::Dumper; print Dumper $block; my $code = <<END; package Chemistry::Mok::UserCode::$pack; no strict; no warnings; sub { my (\$mol, \$file, \$match, \$patt) = \@_; my (\$MOL, \$FILE, \$MATCH, \$PATT, \$FH) = \@_; my (\@A) = \$MATCH ? \$MATCH->atom_map : \$MOL->atoms; my (\@B) = \$MATCH ? \$MATCH->bond_map : \$MOL->bonds; #line $block->{line} "mok code" $block->{block}; } END print "MOK: COMPILING BLOCK: <<<<$code>>>>\n\n" if $DEBUG; my $sub = eval $code; die "Mol: Error compiling block: $@" if $@; my ($patt, $patt_str); if ($block->{patt}) { $block->{patt} =~ m#^/(.*)/$#; $patt_str = $1; $patt = Chemistry::Pattern->parse($patt_str, format => $block->{pattlang} || $format); $patt->attr(global => 1) if $block->{opts} =~ /g/; $patt->options(overlap => 0) if $block->{opts} =~ /O/; $patt->options(permute => 1) if $block->{opts} =~ /p/; } push @compiled_blocks, {'sub' => $sub, patt => $patt, patt_str => $patt_str}; } \@compiled_blocks; }
sub new { my ($class, $code, @a) = @_; my %opts; # for backwards compatibility with Chemistry::Mok->new($code, $package) unshift @a, "package" if (@a == 1); %opts = @a; my $self = bless { 'package' => $opts{package} || "Default", pattern_format => $opts{pattern_format} || "smarts", }, $class; $self->setup_package; my @toks = $self->tokenize($code); my ($subs, $blocks) = $self->parse(@toks); $self->compile_subs(@$subs); $self->{blocks} = $self->compile_blocks(@$blocks); return $self; } sub setup_package { my ($self) = @_; my $usr_pack = $self->{package}; # import convenience functions into the user's namespace eval <<EVAL; package Chemistry::Mok::UserCode::$usr_pack; use Chemistry::Atom ':all'; use Chemistry::Ring ':all'; use Chemistry::Ring::Find ':all'; use Chemistry::Bond::Find ':all'; use Chemistry::Canonicalize ':all'; use Chemistry::InternalCoords::Builder ':all'; use Chemistry::Isotope ':all'; use Math::VectorReal ':all'; use Chemistry::3DBuilder ':all'; sub println { print "\@_", "\n" } EVAL die "Mok: error setting up 'Chemistry::Mok::UserCode::$usr_pack' $@" if $@; }
sub run { my ($self, $opt, @args) = @_; # MAIN LOOP my $mol_class = $opt->{mol_class} || "Chemistry::Mol"; FILE: for my $file (@args) { #my (@mols) = $mol_class->read( my %reader_opts = ( format => $opt->{format}, mol_class => $opt->{mol_class}, ); my $reader = $mol_class->file( $file, %reader_opts, ); $reader->open('<'); $reader->read_header; while (my @mols = $reader->read_mol($reader->fh, %reader_opts)) { MOL: for my $mol (@mols) { if ($opt->{delete_dummies}) { $_->delete for grep { ! $_->Z } $mol->atoms; } if ($opt->{find_bonds}) { find_bonds($mol) unless $mol->bonds; assign_bond_orders($mol); } if ($opt->{aromatize}) { aromatize_mol($mol); } if ($opt->{build_3d}) { build_3d($mol); } BLOCK: for my $block (@{$self->{blocks}}) { my ($code_block, $patt, $patt_str) = @{$block}{qw(sub patt patt_str)}; if ($patt) { MATCH: while ($patt->match($mol)) { $code_block->($mol, $file, $patt, $patt_str, $reader->fh); last unless $patt->attr('global'); } } else { $code_block->($mol, $file, $patt, $patt_str, $reader->fh); } } } } } } 1; __END__