|
From: <per...@li...> - 2006-02-27 17:42:24
|
Update of /cvsroot/perl-flat/flat4cpan/lib/FLAT/FA In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31851/lib/FLAT/FA Modified Files: DFA.pm NFA.pm PFA.pm PRE.pm RE.pm Log Message: cleaning up comments Index: DFA.pm =================================================================== RCS file: /cvsroot/perl-flat/flat4cpan/lib/FLAT/FA/DFA.pm,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** DFA.pm 24 Feb 2006 21:27:15 -0000 1.3 --- DFA.pm 27 Feb 2006 17:42:18 -0000 1.4 *************** *** 1,21 **** # $Revision$ $Date$ $Author$ - =head1 NAME - - DFA - A determinisitic finite automata base class - - =head1 SYNOPSIS - - use DFA; - - =head1 DESCRIPTION - - This module is implements a deterministic finite automata, - including the testing of strings accepted by the DFA. - - B<Methods> - - =cut - package FLAT::FA::DFA; --- 1,4 ---- *************** *** 28,37 **** use Data::Dumper; - =item C<new> - - Returns a new FA object - - =cut - sub new { my $class = shift; --- 11,14 ---- *************** *** 45,60 **** } - - =over 1 - - =item C<jump_start> - - Returns an DFA with 1 start state, one final state, and - a single transistion on the given symbol; if a symbol is - not provided, only a single state that is both the start - and end state is returned - - =cut - sub jump_start { my $self = shift; --- 22,25 ---- *************** *** 82,93 **** } - =over 1 - - =item C<load_file> - - Load DFA from file - - =cut - sub load_file { my $self = shift; --- 47,50 ---- *************** *** 104,113 **** } - =item C<load_string> - - Load DFA from string - - =cut - sub load_string { my $self = shift; --- 61,64 ---- *************** *** 144,155 **** } - =over 1 - - =item C<clone> - - Returns a distinct clone of self - - =cut - sub clone { my $self = shift; --- 95,98 ---- *************** *** 167,175 **** } - =item C<to_nfa> - - Returns an NFA; everything is copied exactly, but it is of class NFA - =cut - sub to_nfa { my $self = shift; --- 110,113 ---- *************** *** 187,197 **** } - =item C<minimize> - - Minimizes number of states and transitions required to accept the regular language - described by original $self; returns an array of the states removed - - =cut - sub minimize() { my $self = shift; --- 125,128 ---- *************** *** 334,345 **** } - =item C<delete_state> - - Deletes state and all references to it...if the start start or only final state is deleted, a loud warning, - messages will be issued; accepts an arbitrarily long list of states to delete, so you could - do $dfa->delete_state(@states) or $dfa->delete_state($state); - - =cut - sub delete_state { my $self = shift(); --- 265,268 ---- *************** *** 386,396 **** } - =item C<rename_states> - - Renames a single state; warns and changes nothing if name conflicts - with another state - - =cut - sub rename_state { my $self = shift; --- 309,312 ---- *************** *** 448,457 **** } - =item C<rename_symbol> - - Renames symbol in dfa - - =cut - # Adds symbol sub rename_symbol { --- 364,367 ---- *************** *** 487,496 **** } - =item C<add_transition> - - Adds transition - note, one state per char...no subsets, that is what NFA is for :) - - =cut - sub add_transition { my $self = shift; --- 397,400 ---- *************** *** 501,511 **** } - =item C<get_transition_on> - - Get transitions on input symbol from specified state; this - is not in FA.pm as it is DFA specific. - - =cut - sub get_transition_on { my $self = shift; --- 405,408 ---- *************** *** 521,530 **** } - =item C<reverse_dfa> - - Will return a DFA object that is the reverse of the current one. - - =cut - sub reverse_dfa { my $self = shift; --- 418,421 ---- *************** *** 532,542 **** } - =item C<to_gdl> - - Outputs a Graph Description Language (GDL) file that can be visualized - using the program aiSee http://www.aisee.com/gdl/nutshell/intro.htm - - =cut - sub to_gdl { my $self = shift; --- 423,426 ---- *************** *** 560,569 **** } - =item C<info> - - Diagnostics, transition information - - =cut - sub info { my $self = shift; --- 444,447 ---- *************** *** 598,608 **** } - =item C<serialize> - - Prints a valid string suitable for an input to stdout - - =cut - - sub serialize { my $self = shift; --- 476,479 ---- *************** *** 621,630 **** } - =item C<is_valid> - - Gives true (1) or false (0) as to if a string is valid - - =cut - sub is_valid { my $self = shift; --- 492,495 ---- *************** *** 660,669 **** } - =item C<get_last_state> - - Returns accepting state, undef if string is not valid - - =cut - sub get_last_state { my $self = shift; --- 525,528 ---- *************** *** 693,702 **** } - =item C<get_path> - - Returns path taken - - =cut - sub get_path { my $self = shift; --- 552,555 ---- *************** *** 727,748 **** } - =item C<generate_random> - - Generates random DFA; not implemented - - =cut - sub generate_random { my $self = shift; } - - =item C<pump_strings> - - Generates accepted strings based on string length - closures will not be repeated, but will be denoted with '*'; not implemented. - - =cut - sub pump_strings { my $self = shift; --- 580,587 ---- *************** *** 750,759 **** } - =item C<init_string_pump> - - Used to initiate iterative string pumping; - - =cut - sub init_string_pump { my $self = shift; --- 589,592 ---- *************** *** 761,770 **** } - =item C<pump_next> - - Used to get next pumped string when being used iteratively; - - =cut - sub pump_next { my $self = shift; --- 594,597 ---- *************** *** 774,778 **** 1; ! =back =head1 AUTHOR --- 601,618 ---- 1; ! __END__ ! ! =head1 NAME ! ! DFA - A determinisitic finite automata base class ! ! =head1 SYNOPSIS ! ! use DFA; ! ! =head1 DESCRIPTION ! ! This module is implements a deterministic finite automata, ! including the testing of strings accepted by the DFA. =head1 AUTHOR Index: PFA.pm =================================================================== RCS file: /cvsroot/perl-flat/flat4cpan/lib/FLAT/FA/PFA.pm,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** PFA.pm 24 Feb 2006 21:27:15 -0000 1.3 --- PFA.pm 27 Feb 2006 17:42:18 -0000 1.4 *************** *** 1,27 **** # $Revision$ $Date$ $Author$ - =head1 NAME - - PFA - A parallel finite automata base class - - =head1 SYNOPSIS - - use PFA; - - =head1 DESCRIPTION - - This module is implements a paralle finite automata, - and the conversion of such to a non deterministic - finite automata; - - One key between PFA implementation an PFA & DFA is that the PFA - may contain more than one start node since it may depict - threads of concurrent execution. The main purpose of this - module is to convert a PFA to an PFA. - - B<Methods> - - =cut - package FLAT::FA::PFA; --- 1,4 ---- *************** *** 33,42 **** use Data::Dumper; - =item C<new> - - Returns a new PFA object - - =cut - sub new { my $class = shift; --- 10,13 ---- *************** *** 56,67 **** } - =over 1 - - =item C<load_file> - - Creates PFA from file - - =cut - sub load_file { my $self = shift; --- 27,30 ---- *************** *** 78,87 **** } - =item C<load_string> - - Creates PFA from string - - =cut - sub load_string { my $self = shift; --- 41,44 ---- *************** *** 125,138 **** } - =over 1 - - =item C<jump_start> - - Returns an PFA with 1 start state, one final state, and - a single transistion on the given symbol; if a symbol is - not provided, epsilon is used. - - =cut - sub jump_start { my $self = shift; --- 82,85 ---- *************** *** 158,171 **** } - =over 18 - - =item C<find_tied> - - Determines the tied nodes in $self, and stores them in $self->{_TIED} for - easy lookup. Use $self->is_tied(@testset) to determine if a set of nodes - contains anything tied on lambda - - =cut - sub find_tied { my $self = shift; --- 105,108 ---- *************** *** 188,199 **** } - =over 18 - - =item C<get_tied> - - Determines if any tied nodes are contained in the provided node set - - =cut - sub get_tied { my $self = shift; --- 125,128 ---- *************** *** 201,212 **** } - =over 18 - - =item C<has_tied> - - Determines if all nodes in a tied set are contained in the provided node set - - =cut - sub has_tied { my $self = shift; --- 130,133 ---- *************** *** 232,241 **** } - =item C<extract_tied> - - Extracts all tied nodes in a set and returns a flattened array - - =cut - sub extract_tied { my $self = shift; --- 153,156 ---- *************** *** 263,272 **** } - =item C<to_nfa> - - To NFA Stuff - convert PFA to NFA using subset construction technique - - =cut - sub to_nfa { my $self = shift; --- 178,181 ---- *************** *** 350,361 **** } - =over 18 - - =item C<serialize_name> - - Creates a string based on the items in the array; - - =cut - sub serialize_name { my $self = shift; --- 259,262 ---- *************** *** 365,376 **** } - =over 18 - - =item C<set_start> - - Sets start node, calls FA->add_node - - =cut - sub set_start { my $self = shift; --- 266,269 ---- *************** *** 384,393 **** } - =item C<get_start> - - Returns start nodes - - =cut - sub get_start { my $self = shift; --- 277,280 ---- *************** *** 395,406 **** } - =over 18 - - =item C<set_active> - - Sets active node, calls FA->add_node - - =cut - sub set_active { my $self = shift; --- 282,285 ---- *************** *** 410,419 **** } - =item C<get_active> - - Returns active nodes - - =cut - sub get_active { my $self = shift; --- 289,292 ---- *************** *** 421,430 **** } - =item C<add_node> - - Adds a node label to the node array if it does not already exist (handles dups) - - =cut - sub add_node { my $self = shift; --- 294,297 ---- *************** *** 437,447 **** } - =item C<get_nodes> - - Returns the array of all nodes - - =cut - - # Returns array of nodes sub get_nodes { my $self = shift; --- 304,307 ---- *************** *** 449,459 **** } - =item C<add_transition> - - Adds a transition - adds node and symbol as well - unique nodes and symbols - enforced by PFA->add_node and PFA->add_symbol - - =cut - sub add_transition { my $self = shift; --- 309,312 ---- *************** *** 469,480 **** } - =item C<get_transition_on> - - Returns transition(s), as an array, for a specific node - on a specific input symbol; This is not in FA.pm as it is - PFA specific. - - =cut - sub get_transition_on { my $self = shift; --- 322,325 ---- *************** *** 490,500 **** } - =item C<is_start> - - Checks to see if given node is in the start node array - - =cut - - # Will test if the string passed to it is a node in the set of start nodes sub is_start { my $self = shift; --- 335,338 ---- *************** *** 502,511 **** } - =item C<set_epsilon> - - Sets epsilon symbol - not working with Perl special vars - $,@,%, etc - - =cut - sub set_epsilon { my $self = shift; --- 340,343 ---- *************** *** 515,524 **** } - =item C<get_epsilon_symbol> - - Returns epsilon symbol - - =cut - sub get_epsilon_symbol { my $self = shift; --- 347,350 ---- *************** *** 526,535 **** } - =item C<get_epsilon_transitions> - - Returns all epsilon transitions for a particular node; node must exist - - =cut - sub get_epsilon_transitions { my $self = shift; --- 352,355 ---- *************** *** 544,553 **** } - =item C<delete_epsilon> - - Deletes epsilon symbol - - =cut - sub delete_epsilon { my $self = shift; --- 364,367 ---- *************** *** 556,565 **** } - =item C<set_lambda> - - Sets lambda symbol - not working with Perl special vars - $,@,%, etc - - =cut - sub set_lambda { my $self = shift; --- 370,373 ---- *************** *** 569,578 **** } - =item C<get_lambda_symbol> - - Returns lambda symbol - - =cut - sub get_lambda_symbol { my $self = shift; --- 377,380 ---- *************** *** 580,589 **** } - =item C<get_lambda_transitions> - - Returns all lambda transitions for a particular node; node must exist - - =cut - sub get_lambda_transitions { my $self = shift; --- 382,385 ---- *************** *** 598,607 **** } - =item C<delete_lambda> - - Deletes lambda symbol - - =cut - sub delete_lambda { my $self = shift; --- 394,397 ---- *************** *** 610,620 **** } - =item C<is_node> - - Tests if given string is the name of a node - - =cut - - # Will test if the string passed to it is the same as a label of any node sub is_node { my $self = shift; --- 400,403 ---- *************** *** 622,633 **** } - =item C<add_final> - - Adds a list of node lables to the final nodes array; handles dups, - and ensures nodes are added to set of nodes - - =cut - - # Adds node to final (accepting) node stack sub add_final { my $self = shift; --- 405,408 ---- *************** *** 640,650 **** } - =item C<get_final> - - Returns the array of all final nodes - - =cut - - # Returns array of final nodes sub get_final { my $self = shift; --- 415,418 ---- *************** *** 652,663 **** } - - =item C<is_final> - - Checks to see if given node is in the final node array - - =cut - - # Will test if the string passed to it is the same as a label of any node sub is_final { my $self = shift; --- 420,423 ---- *************** *** 665,676 **** } - =over 1 - - =item C<clone> - - Returns a distinct clone of self - - =cut - sub clone { my $self = shift; --- 425,428 ---- *************** *** 690,700 **** } - =item C<append_pfa> - - Concatenates start node of given PFA to the common final node of self; - usage: my PFA->append_pfa($PFA1); - - =cut - sub append_pfa { my $self = shift; --- 442,445 ---- *************** *** 734,744 **** } - =item C<prepend_pfa> - - Concatenates start node of self to the common final node of the given PFA; - usage: my PFA->prepend_pfa($PFA1); - - =cut - sub prepend_pfa { my $self = shift; --- 479,482 ---- *************** *** 778,788 **** } - =item C<or_pfa> - - Creates a common start node joined by an epsilon transition; - usage: my PFA->or_pfa($PFA1); - - =cut - sub or_pfa { my $self = shift; --- 516,519 ---- *************** *** 826,835 **** } - =item C<interleave_pfa> - - Joins 2 PFAs using an interleave (concurrent process). - - =cut - sub interleave_pfa { my $self = shift; --- 557,560 ---- *************** *** 886,896 **** } - =item C<kleene> - - Wraps given PFA in Kleene Closure - usage: my PFA->kleene(); - - =cut - sub kleene { my $self = shift; --- 611,614 ---- *************** *** 918,929 **** } - =item C<pinch> - - Creates epsilon transitions from all final nodes to a common final node; . - usage: my $PFA3 = PFA->pinch_pfa($PFA1); does nothing if there is only one - final node - - =cut - sub pinch { my $self = shift; --- 636,639 ---- *************** *** 948,958 **** } - =item C<rename_nodes> - - Renames a single node; warns and changes nothing if name conflicts - with another node - - =cut - sub rename_node { my $self = shift; --- 658,661 ---- *************** *** 1032,1048 **** } - =item C<ensure_unique_nodes> - - Compares the names of the nodes in $self and the - provided FA, and only renames a node (in $self) - if a name collision is detected; if the disambiguation - string causes a new collision, a random string is created - using crypt() until there is no collision detected - - Usage: - $self->ensure_unique_nodes($PFA1,'string_to_disambiguate'); - - =cut - sub ensure_unique_nodes { my $self = shift; --- 735,738 ---- *************** *** 1063,1073 **** } - =item C<number_nodes> - - Numbers nodes 0-# of nodes; first appends node name with a - random string to avoid conflicts - - =cut - sub number_nodes { my $self = shift; --- 753,756 ---- *************** *** 1089,1098 **** } - =item C<append_node_names> - - Appends node names with the specified suffix - - =cut - sub append_node_names { my $self = shift; --- 772,775 ---- *************** *** 1109,1119 **** } - - =item C<prepend_node_names> - - Prepends node names with the specified prefix - - =cut - sub prepend_node_names { my $self = shift; --- 786,789 ---- *************** *** 1130,1139 **** } - =item C<rename_symbol> - - Renames symbol in pfa - - =cut - # renames symbol sub rename_symbol { --- 800,803 ---- *************** *** 1171,1180 **** } - =item C<info> - - Return string with info - - =cut - sub info { my $self = shift; --- 835,838 ---- *************** *** 1216,1225 **** } - =item C<serialize> - - Prints a valid string suitable for an input to stdout - - =cut - sub serialize { my $self = shift; --- 874,877 ---- *************** *** 1252,1256 **** 1; ! =back =head1 AUTHOR --- 904,927 ---- 1; ! __END__ ! ! =head1 NAME ! ! PFA - A parallel finite automata base class ! ! =head1 SYNOPSIS ! ! use PFA; ! ! =head1 DESCRIPTION ! ! This module is implements a paralle finite automata, ! and the conversion of such to a non deterministic ! finite automata; ! ! One key between PFA implementation an PFA & DFA is that the PFA ! may contain more than one start node since it may depict ! threads of concurrent execution. The main purpose of this ! module is to convert a PFA to an PFA. =head1 AUTHOR Index: NFA.pm =================================================================== RCS file: /cvsroot/perl-flat/flat4cpan/lib/FLAT/FA/NFA.pm,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** NFA.pm 21 Feb 2006 14:43:40 -0000 1.2 --- NFA.pm 27 Feb 2006 17:42:18 -0000 1.3 *************** *** 1,22 **** # $Revision$ $Date$ $Author$ - =head1 NAME - - NFA - A non deterministic finite automata base class - - =head1 SYNOPSIS - - use NFA; - - =head1 DESCRIPTION - - This module implements a non deterministic finite automata, - including support for epsilon transitions and conversion - to a deterministic finite automata. - - B<Methods> - - =cut - package FLAT::FA::NFA; --- 1,4 ---- *************** *** 28,38 **** use Data::Dumper; - - =item C<new> - - Returns a new FA object - - =cut - sub new { my $class = shift; --- 10,13 ---- *************** *** 47,60 **** } - =over 1 - - =item C<jump_start> - - Returns an NFA with 1 start state, one final state, and - a single transistion on the given symbol; if a symbol is - not provided, epsilon is used. - - =cut - sub jump_start { my $self = shift; --- 22,25 ---- *************** *** 81,93 **** } - - =over 1 - - =item C<load_file> - - Creates NFA from file - - =cut - sub load_file { my $self = shift; --- 46,49 ---- *************** *** 104,113 **** } - =item C<load_string> - - Creates NFA from string - - =cut - sub load_string { my $self = shift; --- 60,63 ---- *************** *** 146,157 **** } - =over 1 - - =item C<clone> - - Returns a distinct clone of self - - =cut - sub clone { my $self = shift; --- 96,99 ---- *************** *** 170,180 **** } - =item C<append_nfa> - - Concatenates start state of given NFA to the common final state of self; - usage: my NFA->append_nfa($NFA1); - - =cut - sub append_nfa { my $self = shift; --- 112,115 ---- *************** *** 216,226 **** } - =item C<prepend_nfa> - - Concatenates start state of self to the common final state of the given NFA; - usage: my NFA->prepend_nfa($NFA1); - - =cut - sub prepend_nfa { my $self = shift; --- 151,154 ---- *************** *** 263,273 **** } - =item C<or_nfa> - - Creates a common start state joined by an epsilon transition; - usage: my NFA->or_nfa($NFA1); - - =cut - sub or_nfa { my $self = shift; --- 191,194 ---- *************** *** 317,328 **** } - - =item C<kleene> - - Wraps given NFA in Kleene Closure - usage: my NFA->kleene(); - - =cut - sub kleene { my $self = shift; --- 238,241 ---- *************** *** 350,361 **** } - =item C<pinch> - - Creates epsilon transitions from all final states to a common final state; . - usage: my $NFA3 = NFA->pinch_nfa($NFA1); does nothing if there is only one - final state - - =cut - sub pinch { my $self = shift; --- 263,266 ---- *************** *** 380,389 **** } - =item C<reverse> - - Reverses an NFA - - =cut - sub reverse { my $self = shift; --- 285,288 ---- *************** *** 409,419 **** } - =item C<rename_states> - - Renames a single state; warns and changes nothing if name conflicts - with another state - - =cut - sub rename_state { my $self = shift; --- 308,311 ---- *************** *** 475,484 **** } - =item C<rename_symbol> - - Renames symbol in nfa - - =cut - # renames symbol sub rename_symbol { --- 367,370 ---- *************** *** 516,527 **** } - =item C<add_transition> - - Adds a transition - adds state and symbol as well - unique states and symbols - enforced by NFA->add_state and NFA->add_symbol; since this is an NFA, transitions - on $symbol from $state may be added to multiple destinations - - =cut - sub add_transition { my $self = shift; --- 402,405 ---- *************** *** 540,551 **** } - =item C<get_transition_on> - - Returns transition(s), as an array, for a specific state - on a specific input symbol; This is not in FA.pm as it is - NFA specific. - - =cut - sub get_transition_on { my $self = shift; --- 418,421 ---- *************** *** 561,570 **** } - =item C<set_epsilon> - - Sets epsilon symbol - not working with Perl special vars - $,@,%, etc - - =cut - sub set_epsilon { my $self = shift; --- 431,434 ---- *************** *** 574,583 **** } - =item C<get_epsilon_symbol> - - Returns epsilon symbol - - =cut - sub get_epsilon_symbol { my $self = shift; --- 438,441 ---- *************** *** 585,594 **** } - =item C<get_epsilon_transitions> - - Returns all epsilon transitions for a particular state; state must exist - - =cut - sub get_epsilon_transitions { my $self = shift; --- 443,446 ---- *************** *** 603,612 **** } - =item C<delete_epsilon> - - Deletes epsilon symbol - - =cut - sub delete_epsilon { my $self = shift; --- 455,458 ---- *************** *** 615,624 **** } - =item C<to_dfa> - - To DFA Stuff - convert NFA to DFA using subset construction technique - - =cut - sub to_dfa { my $self = shift; --- 461,464 ---- *************** *** 681,691 **** } - =item C<move> - - Get all transition states for the specific symbol - **candidate for anonymous sub in NFA->to_dfa() - - =cut - sub move { my $self = shift; --- 521,524 ---- *************** *** 712,722 **** } - =item C<epsilon_closure> - - Peform e-closure - get all the states that the provided subset of states transitions to on epsilon (empty string) - **candidate for anonymous sub in NFA->to_dfa() - - =cut - sub epsilon_closure { my $self = shift; --- 545,548 ---- *************** *** 745,754 **** } - =item C<info> - - Return string with info - - =cut - sub info { my $self = shift; --- 571,574 ---- *************** *** 783,792 **** } - =item C<serialize> - - Prints a valid string suitable for an input to stdout - - =cut - sub serialize { my $self = shift; --- 603,606 ---- *************** *** 812,831 **** } - =item C<generate_random> - - Generates random DFA; not implemented - - =cut sub generate_random { my $self = shift; } - =item C<generate_from_strings> - - Will be used to create an NFA that will accept @THIS, but - not @THAT - - =cut - sub generate_from_strings { my $self = shift; --- 626,633 ---- *************** *** 835,839 **** 1; ! =back =head1 AUTHOR --- 637,655 ---- 1; ! __END__ ! ! =head1 NAME ! ! NFA - A non deterministic finite automata base class ! ! =head1 SYNOPSIS ! ! use NFA; ! ! =head1 DESCRIPTION ! ! This module implements a non deterministic finite automata, ! including support for epsilon transitions and conversion ! to a deterministic finite automata. =head1 AUTHOR Index: PRE.pm =================================================================== RCS file: /cvsroot/perl-flat/flat4cpan/lib/FLAT/FA/PRE.pm,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** PRE.pm 24 Feb 2006 21:27:15 -0000 1.3 --- PRE.pm 27 Feb 2006 17:42:18 -0000 1.4 *************** *** 1,68 **** # $Revision$ $Date$ $Author$ - =head1 NAME - - PRE - A regular expression base class - - =head1 SYNOPSIS - - use PRE; - - =head1 DESCRIPTION - - This module implements a parallel regular expression - parser, and supports the conversion of a PRE to - a parallel finite automata. A homegrown recursive - descent parser is used to build the parse tree, and the method - used to convert the parallel regular expression to a PFA is a - modified Thompson's construction that accounts for the additional - interleave operator (&). - - Recursive Descent-safe Regex Grammar: - - R -> P - - P -> OP' - - P -> '&' OP' | epsilon - - O -> CO' - - O' -> '|' CO' | epsilon - - C -> SC' - - C' -> .SC' | epsilon - - S -> LS' - - S' -> *S' | epsilon - - L -> a | b | c |..| 0 | 1 | 2 |..| (R) | epsilon - - Terminal symbols: a,b,c,..,z,0,1,2,..,9,|,*,(,) - - NOTE: Concatenation operator, '.', is not a terminal symbol - and should not be included in the regex - - FAQ: - Q: Does this support Perl regular expressions? - A: No, just the regular expression using the terminal symbols - listed above. - - B<Valid terminal characters include:> - - I<a b c d e f g h i j k l m n o p q r s t u v w x y z> - - I<A B C D E F G H I J K L M N O P Q R S T U V W X Y Z> - - I<0 1 2 3 4 5 6 7 8 9 + - = ? [ ] { } . ~ ^ @ % $> - - I<: ; < >> - - B<Methods> - - =cut - package FLAT::FA::PRE; --- 1,4 ---- *************** *** 74,85 **** use Data::Dumper; - =over 1 - - =item C<new> - - Create a brand spaking new pRE object; does not accept regex here - - =cut - sub new { my $class = shift; --- 10,13 ---- *************** *** 107,117 **** } - =item C<set_epsilon> - - Not really for public consumption, but could be in the future. Defines - how epsilon (i.e., the null string) is represented in the parse tree. - - =cut - sub set_epsilon { my $self = shift; --- 35,38 ---- *************** *** 122,131 **** } - =item C<get_epsilon_symbol> - - Returns the string representation of the null string - - =cut - sub get_epsilon_symbol { my $self = shift; --- 43,46 ---- *************** *** 133,143 **** } - - =item C<set_pre> - - Defines a regular expression for RE to parse through - - =cut - sub set_pre { my $self = shift; --- 48,51 ---- *************** *** 174,184 **** } - - =item C<get_pre> - - Returns the regular expression set by C<set_pre> as a string - - =cut - sub get_pre { my $self = shift; --- 82,85 ---- *************** *** 186,196 **** } - =item C<set_current> - - Meant for private consumption. Initializes the stack used to store - what part of the regex has yet to be parsed - - =cut - sub set_current { my $self = shift; --- 87,90 ---- *************** *** 201,211 **** } - =item C<reset_current> - - Meant for private consumption. Initializes the stack used to store - what part of the regex has yet to be parsed - - =cut - sub reset_current { my $self = shift; --- 95,98 ---- *************** *** 214,223 **** } - =item C<get_current> - - Returns what is in the _CURRENT_STR stack - - =cut - sub get_current { my $self = shift; --- 101,104 ---- *************** *** 225,236 **** } - =item C<to_pfa> - - Not implemented, but will create an PFA using Thompson's Method; from there - one could do a PFA->to_dfa, and compare the resulting dfa to the one from - RE->to_dfa. - - =cut - sub to_pfa { my $self = shift; --- 106,109 ---- *************** *** 246,255 **** } - =item C<thompson> - - Guts of RE->to_pfa; uses depth first parse tree traversal - - =cut - sub thompson { my $self = shift; --- 119,122 ---- *************** *** 293,302 **** ################################################################ - =item C<parse> - - Parses regular expressin set by RE->set_pre, and stores the parse tree in _PARSE_TREE - - =cut - sub parse { my $self = shift; --- 160,163 ---- *************** *** 310,319 **** } - =item C<cat_endmarker> - - Adds '#', or the end of regex marker to the parse tree; not for public consumption - - =cut - sub cat_endmarker { my $self = shift; --- 171,174 ---- *************** *** 322,332 **** } - =item C<match> - - Matches current terminal symbol with terminal character, - and loads up the next lookahead character - - =cut - sub match { my $self = shift; --- 177,180 ---- *************** *** 344,353 **** } - =item C<lookahead> - - Returns value of current lookahead - - =cut - sub lookahead { my $self = shift; --- 192,195 ---- *************** *** 355,364 **** } - =item C<nexttoken> - - Sets next token as lookahead - - =cut - sub nexttoken { my $self = shift; --- 197,200 ---- *************** *** 370,379 **** } - =item C<R> - - R -> O - - =cut - sub R { my $self = shift; --- 206,209 ---- *************** *** 387,396 **** } - =item C<P> - - P -> OP' - - =cut - sub P { my $self = shift; --- 217,220 ---- *************** *** 405,414 **** } - =item C<P_prime> - - P' -> '&'O | epsilon - - =cut - sub P_prime { my $self = shift; --- 229,232 ---- *************** *** 437,446 **** } - =item C<O> - - O -> CO' - - =cut - sub O { my $self = shift; --- 255,258 ---- *************** *** 455,464 **** } - =item C<O_prime> - - O' -> '|'CO' | epsilon - - =cut - sub O_prime { my $self = shift; --- 267,270 ---- *************** *** 487,496 **** } - =item C<C> - - C -> SC' - - =cut - sub C { my $self = shift; --- 293,296 ---- *************** *** 505,514 **** } - =item C<C_prime> - - C' -> .SC' | epsilon - - =cut - sub C_prime { my $self = shift; --- 305,308 ---- *************** *** 536,545 **** } - =item C<S> - - S -> LS' - - =cut - sub S { my $self = shift; --- 330,333 ---- *************** *** 554,563 **** } - =item C<S_prime> - - S' -> *S' | epsilon - - =cut - sub S_prime { my $self = shift; --- 342,345 ---- *************** *** 575,584 **** } - =item C<L> - - L -> a | b | c |..| 0 | 1 | 2 |..| (R) - - =cut - sub L { my $self = shift; --- 357,360 ---- *************** *** 611,620 **** } - =item C<get_next_pos> - - Returns the next position, used in creating leaf nodes for terminal symbols (minus null string) - - =cut - sub get_next_pos { my $self = shift; --- 387,390 ---- *************** *** 622,631 **** } - =item C<get_curr_pos> - - Returns the current count of terminal symbols (minus null string) - - =cut - sub get_curr_pos { my $self = shift; --- 392,395 ---- *************** *** 633,642 **** } - =item C<set_parse_tree> - - Set parse tree - - =cut - sub set_parse_tree { my $self = shift; --- 397,400 ---- *************** *** 645,654 **** } - =item C<get_parse_tree> - - Return parse tree - - =cut - sub get_parse_tree { my $self = shift; --- 403,406 ---- *************** *** 656,665 **** } - =item C<get_terminals> - - Returns array of terminal symbols - - =cut - sub get_terminals { my $self = shift; --- 408,411 ---- *************** *** 667,676 **** } - =item C<is_terminal> - - Checks to see if given character is a terminal symbol - - =cut - sub is_terminal { my $self = shift; --- 413,416 ---- *************** *** 678,687 **** } - =item C<is_member> - - General subroutine used to test if an element is already in an array - - =cut - sub is_member { my $self = shift; --- 418,421 ---- *************** *** 705,714 **** } - =item C<get_symbols> - - Returns array of all symbols used in current regex - - =cut - sub get_symbols { my $self = shift; --- 439,442 ---- *************** *** 716,725 **** } - =item C<trace_on> - - Turns on tracing - allows to trace the recursive descent parsing - - =cut - sub trace_on { my $self = shift; --- 444,447 ---- *************** *** 728,737 **** } - =item C<trace_off> - - Turns tracing off - - =cut - sub trace_off { my $self = shift; --- 450,453 ---- *************** *** 740,749 **** } - =item C<trace> - - Returns value of _TRACE - - =cut - sub trace { my $self = shift; --- 456,459 ---- *************** *** 751,760 **** } - =item C<toggle_cat_state> - - Toggles cat state instead of cat'ing a '.' to everything - - =cut - sub toggle_cat_state { my $self = shift; --- 461,464 ---- *************** *** 763,772 **** } - =item C<get_cat_state> - - Returns $self->{_CAT_STATE} (1|0) - - =cut - sub get_cat_state { my $self = shift; --- 467,470 ---- *************** *** 774,783 **** } - =item C<set_error> - - Increments error count for regex parsing - - =cut - sub set_error { my $self = shift; --- 472,475 ---- *************** *** 785,794 **** } - =item C<get_error> - - Returns error count - - =cut - sub get_error { my $self = shift; --- 477,480 ---- *************** *** 796,805 **** } - =item C<set_done> - - Sets done flag - - =cut - sub set_done { my $self = shift; --- 482,485 ---- *************** *** 807,816 **** } - =item C<done> - - Returns if done or not - - =cut - sub done { my $self = shift; --- 487,490 ---- *************** *** 818,829 **** } - =item C<DESTROY> - - Called automatically when object is destroyed either explicitly - or automatically when references go to 0 or when the main program - is finished - - =cut - sub DESTROY { return; --- 492,495 ---- *************** *** 832,836 **** 1; ! =back =head1 AUTHOR --- 498,562 ---- 1; ! __END__ ! ! =head1 NAME ! ! PRE - A regular expression base class ! ! =head1 SYNOPSIS ! ! use PRE; ! ! =head1 DESCRIPTION ! ! This module implements a parallel regular expression ! parser, and supports the conversion of a PRE to ! a parallel finite automata. A homegrown recursive ! descent parser is used to build the parse tree, and the method ! used to convert the parallel regular expression to a PFA is a ! modified Thompson's construction that accounts for the additional ! interleave operator (&). ! ! Recursive Descent-safe Regex Grammar: ! ! R -> P ! ! P -> OP' ! ! P -> '&' OP' | epsilon ! ! O -> CO' ! ! O' -> '|' CO' | epsilon ! ! C -> SC' ! ! C' -> .SC' | epsilon ! ! S -> LS' ! ! S' -> *S' | epsilon ! ! L -> a | b | c |..| 0 | 1 | 2 |..| (R) | epsilon ! ! Terminal symbols: a,b,c,..,z,0,1,2,..,9,|,*,(,) ! ! NOTE: Concatenation operator, '.', is not a terminal symbol ! and should not be included in the regex ! ! FAQ: ! Q: Does this support Perl regular expressions? ! A: No, just the regular expression using the terminal symbols ! listed above. ! ! B<Valid terminal characters include:> ! ! I<a b c d e f g h i j k l m n o p q r s t u v w x y z> ! ! I<A B C D E F G H I J K L M N O P Q R S T U V W X Y Z> ! ! I<0 1 2 3 4 5 6 7 8 9 + - = ? [ ] { } . ~ ^ @ % $> ! ! I<: ; < >> =head1 AUTHOR Index: RE.pm =================================================================== RCS file: /cvsroot/perl-flat/flat4cpan/lib/FLAT/FA/RE.pm,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** RE.pm 24 Feb 2006 21:27:15 -0000 1.3 --- RE.pm 27 Feb 2006 17:42:18 -0000 1.4 *************** *** 1,68 **** # $Revision$ $Date$ $Author$ - =head1 NAME - - RE - A regular expression base class - - =head1 SYNOPSIS - - use RE; - use DFA; - my $re = RE->new(); - $re->set_re('a|b|(hi)*'); - my $dfa = $re->to_dfa(); - print $dfa->info(); # see stuff on DFA - - =head1 DESCRIPTION - - This module implements a regular expression - parser, and supports the conversion of a RE to - a deterministic finite automata. A homegrown recursive - descent parser is used to build the parse tree, and the method - used to conver the regular expression to a DFA uses no intermediate - NFA. - - Recursive Descent-safe Regex Grammar: - - R -> O - - O -> CO' - - O' -> '|' CO' | epsilon - - C -> SC' - - C' -> .SC' | epsilon - - S -> LS' - - S' -> *S' | epsilon - - L -> a | b | c |..| 0 | 1 | 2 |..| (R) | epsilon - - Terminal symbols: a,b,c,..,z,0,1,2,..,9,|,*,(,) - - NOTE: Concatenation operator, '.', is not a terminal symbol - and should not be included in the regex - - FAQ: - Q: Does this support Perl regular expressions? - A: No, just the regular expression using the terminal symbols - listed above. - - B<Valid terminal characters include:> - - I<a b c d e f g h i j k l m n o p q r s t u v w x y z> - - I<A B C D E F G H I J K L M N O P Q R S T U V W X Y Z> - - I<0 1 2 3 4 5 6 7 8 9 + - = ? & [ ] { } . ~ ^ @ % $> - - I<: ; < >> - - B<Methods> - - =cut - package FLAT::FA::RE; --- 1,4 ---- *************** *** 75,86 **** use Data::Dumper; - =over 1 - - =item C<new> - - Create a brand spaking new RE object; does not accept regex here - - =cut - sub new { my $class = shift; --- 11,14 ---- *************** *** 108,118 **** } - =item C<set_epsilon> - - Not really for public consumption, but could be in the future. Defines - how epsilon (i.e., the null string) is represented in the parse tree. - - =cut - sub set_epsilon { my $self = shift; --- 36,39 ---- *************** *** 123,132 **** } - =item C<get_epsilon_symbol> - - Returns the string representation of the null string - - =cut - sub get_epsilon_symbol { my $self = shift; --- 44,47 ---- *************** *** 134,144 **** } - - =item C<set_re> - - Defines a regular expression for RE to parse through - - =cut - sub set_re { my $self = shift; --- 49,52 ---- *************** *** 175,185 **** } - - =item C<get_re> - - Returns the regular expression set by C<set_re> as a string - - =cut - sub get_re { my $self = shift; --- 83,86 ---- *************** *** 187,197 **** } - =item C<set_current> - - Meant for private consumption. Initializes the stack used to store - what part of the regex has yet to be parsed - - =cut - sub set_current { my $self = shift; --- 88,91 ---- *************** *** 202,212 **** } - =item C<reset_current> - - Meant for private consumption. Initializes the stack used to store - what part of the regex has yet to be parsed - - =cut - sub reset_current { my $self = shift; --- 96,99 ---- *************** *** 215,224 **** } - =item C<get_current> - - Returns what is in the _CURRENT_STR stack - - =cut - sub get_current { my $self = shift; --- 102,105 ---- *************** *** 226,247 **** } - =item C<minimize> - - Minimizes the regular expression optimally - - =cut - sub minimize { my $self = shift; - return; } - =item C<shrink> - - Shrinks RE using hueristics to create a 'small enough' equivalent expression - - =cut - sub shrink { my $self = shift; --- 107,115 ---- *************** *** 250,261 **** } - =item C<to_nfa> - - Not implemented, but will create an NFA using Thompson's Method; from there - one could do a NFA->to_dfa, and compare the resulting dfa to the one from - RE->to_dfa. - - =cut - sub to_nfa { my $self = shift; --- 118,121 ---- *************** *** 269,278 **** } - =item C<thompson> - - Guts of RE->to_nfa; uses depth first parse tree traversal - - =cut - sub thompson { my $self = shift; --- 129,132 ---- *************** *** 310,322 **** } ! =item C<to_dfa> ! ! Currently BREAKS on a*(b|cd)m!!!!!!!!!!!!!! ! ! Main driver which is used to convert the regular expression to a DFA; calls ! RE->parse() internally if _PARSE_TREE is !defined, so no need to call before this function. ! ! =cut ! sub to_dfa_BROKEN { my $self = shift; --- 164,168 ---- } ! #Currently BREAKS on a*(b|cd)m!!!!!!!!!!!!!! sub to_dfa_BROKEN { my $self = shift; *************** *** 387,397 **** } - - =item C<get_transitions_on> - - Provides a way to get the transitions using the contents of the _FOLLOW_POS table - - =cut - sub get_transition_on { my $self = shift; --- 233,236 ---- *************** *** 405,415 **** } - =item C<move> - - Called by RE->to_re to get the sub set of new states for each sub set of states during - the sub state construction process of building the DFA - - =cut - sub move { my $self = shift; --- 244,247 ---- *************** *** 436,445 **** } - =item C<firstpos> - - Determines firt positions for all nodes in the a parse tree - - =cut - sub firstpos { my $self = shift; --- 268,271 ---- *************** *** 488,497 **** } - =item C<lastpost> - - Determines the last postition for all nodes in the parse tree - - =cut - sub lastpos { my $self = shift; --- 314,317 ---- *************** *** 540,549 **** } - =item C<followpos> - - Determines the first postition for all nodes in the parse tree - - =cut - sub followpos { my $self = shift; --- 360,363 ---- *************** *** 569,578 **** } - =item C<followpos> - - Returns hash containing follow position table - - =cut - sub get_followpos { my $self = shift; --- 383,386 ---- *************** *** 584,593 **** ################################################################ - =item C<parse> - - Parses regular expressin set by RE->set_re, and stores the parse tree in _PARSE_TREE - - =cut - sub parse { my $self = shift; --- 392,395 ---- *************** *** 601,610 **** } - =item C<cat_endmarker> - - Adds '#', or the end of regex marker to the parse tree; not for public consumption - - =cut - sub cat_endmarker { my $self = shift; --- 403,406 ---- *************** *** 613,623 **** } - =item C<match> - - Matches current terminal symbol with terminal character, - and loads up the next lookahead character - - =cut - sub match { my $self = shift; --- 409,412 ---- *************** *** 635,644 **** } - =item C<lookahead> - - Returns value of current lookahead - - =cut - sub lookahead { my $self = shift; --- 424,427 ---- *************** *** 646,655 **** } - =item C<nexttoken> - - Sets next token as lookahead - - =cut - sub nexttoken { my $self = shift; --- 429,432 ---- *************** *** 661,670 **** } - =item C<R> - - R -> O - - =cut - sub R { my $self = shift; --- 438,441 ---- *************** *** 678,687 **** } - =item C<O> - - O -> CO' - - =cut - sub O { my $self = shift; --- 449,452 ---- *************** *** 696,705 **** } - =item C<O_prime> - - O' -> '|'CO' | epsilon - - =cut - sub O_prime { my $self = shift; --- 461,464 ---- *************** *** 728,737 **** } - =item C<C> - - C -> SC' - - =cut - sub C { my $self = shift; --- 487,490 ---- *************** *** 746,755 **** } - =item C<C_prime> - - C' -> .SC' | epsilon - - =cut - sub C_prime { my $self = shift; --- 499,502 ---- *************** *** 777,786 **** } - =item C<S> - - S -> LS' - - =cut - sub S { my $self = shift; --- 524,527 ---- *************** *** 795,804 **** } - =item C<S_prime> - - S' -> *S' | epsilon - - =cut - sub S_prime { my $self = shift; --- 536,539 ---- *************** *** 816,825 **** } - =item C<L> - - L -> a | b | c |..| 0 | 1 | 2 |..| (R) - - =cut - sub L { my $self = shift; --- 551,554 ---- *************** *** 852,861 **** } - =item C<get_next_pos> - - Returns the next position, used in creating leaf nodes for terminal symbols (minus null string) - - =cut - sub get_next_pos { my $self = shift; --- 581,584 ---- *************** *** 863,872 **** } - =item C<get_curr_pos> - - Returns the current count of terminal symbols (minus null string) - - =cut - sub get_curr_pos { my $self = shift; --- 586,589 ---- *************** *** 874,883 **** } - =item C<set_parse_tree> - - Set parse tree - - =cut - sub set_parse_tree { my $self = shift; --- 591,594 ---- *************** *** 886,895 **** } - =item C<get_parse_tree> - - Return parse tree - - =cut - sub get_parse_tree { my $self = shift; --- 597,600 ---- *************** *** 897,906 **** } - =item C<get_terminals> - - Returns array of terminal symbols - - =cut - sub get_terminals { my $self = shift; --- 602,605 ---- *************** *** 908,917 **** } - =item C<is_terminal> - - Checks to see if given character is a terminal symbol - - =cut - sub is_terminal { my $self = shift; --- 607,610 ---- *************** *** 919,928 **** } - =item C<is_member> - - General subroutine used to test if an element is already in an array - - =cut - sub is_member { my $self = shift; --- 612,615 ---- *************** *** 934,955 **** $ret++; } - # foreach (@_) { - # if (defined($_)) { - # if ($test eq $_) { - # $ret++; - # last; - # } - # } - # } } return $ret; } - =item C<get_symbols> - - Returns array of all symbols used in current regex - - =cut - sub get_symbols { my $self = shift; --- 621,628 ---- *************** *** 957,966 **** } - =item C<trace_on> - - Turns on tracing - allows to trace the recursive descent parsing - - =cut - sub trace_on { my $self = shift; --- 630,633 ---- *************** *** 969,978 **** } - =item C<trace_off> - - Turns tracing off - - =cut - sub trace_off { my $self = shift; --- 636,639 ---- *************** *** 981,990 **** } - =item C<trace> - - Returns value of _TRACE - - =cut - sub trace { my $self = shift; --- 642,645 ---- *************** *** 992,1001 **** } - =item C<toggle_cat_state> - - Toggles cat state instead of cat'ing a '.' to everything - - =cut - sub toggle_cat_state { my $self = shift; --- 647,650 ---- *************** *** 1004,1013 **** } - =item C<get_cat_state> - - Returns $self->{_CAT_STATE} (1|0) - - =cut - sub get_cat_state { my $self = shift; --- 653,656 ---- *************** *** 1015,1024 **** } - =item C<set_error> - - Increments error count for regex parsing - - =cut - sub set_error { my $self = shift; --- 658,661 ---- *************** *** 1026,1035 **** } - =item C<get_error> - - Returns error count - - =cut - sub get_error { my $self = shift; --- 663,666 ---- *************** *** 1037,1046 **** } - =item C<set_done> - - Sets done flag - - =cut - sub set_done { my $self = shift; --- 668,671 ---- *************** *** 1048,1057 **** } - =item C<done> - - Returns if done or not - - =cut - sub done { my $self = shift; --- 673,676 ---- *************** *** 1059,1070 **** } - =item C<DESTROY> - - Called automatically when object is destroyed either explicitly - or automatically when references go to 0 or when the main program - is finished - - =cut - sub DESTROY { return; --- 678,681 ---- *************** *** 1073,1077 **** 1; ! =back =head1 AUTHOR --- 684,748 ---- 1; ! __END__ ! ! =head1 NAME ! ! RE - A regular expression base class ! ! =head1 SYNOPSIS ! ! use RE; ! use DFA; ! my $re = RE->new(); ! $re->set_re('a|b|(hi)*'); ! my $dfa = $re->to_dfa(); ! print $dfa->info(); # see stuff on DFA ! ! =head1 DESCRIPTION ! ! This module implements a regular expression ! parser, and supports the conversion of a RE to ! a deterministic finite automata. A homegrown recursive ! descent parser is used to build the parse tree, and the method ! used to conver the regular expression to a DFA uses no intermediate ! NFA. ! ! Recursive Descent-safe Regex Grammar: ! ! R -> O ! ! O -> CO' ! ! O' -> '|' CO' | epsilon ! ! C -> SC' ! ! C' -> .SC' | epsilon ! ! S -> LS' ! ! S' -> *S' | epsilon ! ! L -> a | b | c |..| 0 | 1 | 2 |..| (R) | epsilon ! ! Terminal symbols: a,b,c,..,z,0,1,2,..,9,|,*,(,) ! ! NOTE: Concatenation operator, '.', is not a terminal symbol ! and should not be included in the regex ! ! FAQ: ! Q: Does this support Perl regular expressions? ! A: No, just the regular expression using the terminal symbols ! listed above. ! ! B<Valid terminal characters include:> ! ! I<a b c d e f g h i j k l m n o p q r s t u v w x y z> ! ! I<A B C D E F G H I J K L M N O P Q R S T U V W X Y Z> ! ! I<0 1 2 3 4 5 6 7 8 9 + - = ? & [ ] { } . ~ ^ @ % $> ! ! I<: ; < >> =head1 AUTHOR |