First a brief walkthrough:
- create a stack of context handlers (a handler must react on passed tokens)
- first step is to tokenize the content into a list of tokens
- push the first (global) context onto the context stack
- iterate through the token list and send the token to the last context handler on the stack
( the context handlers are responsible for pushing/poping contexts onto the stack )
- pop off the last context, this should be the first pushed, the global (the trunk of the tree)
Example input: 3 + ( 4 * 2.3 ) - 1
the tokenized list would be: ['3', '+', '(', '4', '*', '2.3', ')', '-', '1']
- push a global scope onto the context handler stack (context 0)
looping through the tokens:
on '3': the context handler adds 3 to its expression list (context 1)
on '+': the context handler adds + to its expression list (context 1)
on '(': the context handler pushes a new context onto the the context handler stack (context 2)
on '4': the context handler adds 4 to its expression list (context 2)
on '*': the context handler adds * to its expression list (context 2)
on '2.3': the context handler adds 2.3 to its expression list (context 2)
on ')': the context handler pops off 'context 2', adds it to the expression list of context 1
on '-': the context handler adds - to the expression list (context 1)
on '1': the context handler adds 1 to the expression list (context 1)
context 1 looks like this: 3 + (context 2) - 1
context 2 looks like this: 4 * 2.3
Now that the data is parsed into the tree structure, some recursive evaluation and we've got the result.
/exprlib/Parser.php
<?php namespace exprlib; /** * this model handles the tokenizing, the context stack functions, and * the parsing (token list to tree trans). * as well as an evaluate method which delegates to the global scopes evaluate. */ class Parser { protected $_content = null; protected $_context_stack = array(); protected $_tree = null; protected $_tokens = array(); public function __construct($content = null) { if ( $content ) { $this->set_content( $content ); } } /** * this function does some simple syntax cleaning: * - removes all spaces * - replaces '**' by '^' * then it runs a regex to split the contents into tokens. the set * of possible tokens in this case is predefined to numbers (ints of floats) * math operators (*, -, +, /, **, ^) and parentheses. */ public function tokenize() { $this->_content = str_replace(array("\n","\r","\t"," "), '', $this->_content); $this->_content = str_replace('**', '^', $this->_content); $this->_content = str_replace('PI', (string)PI(), $this->_content); $this->_tokens = preg_split( '@([\d\.]+)|(sin\(|cos\(|tan\(|sqrt\(|\+|\-|\*|/|\^|\(|\))@', $this->_content, null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); return $this; } /** * this is the the loop that transforms the tokens array into * a tree structure. */ public function parse() { # this is the global scope which will contain the entire tree $this->push_context( new \exprlib\contexts\Scope() ); foreach ( $this->_tokens as $token ) { # get the last context model from the context stack, # and have it handle the next token $this->get_context()->handle_token( $token ); } $this->_tree = $this->pop_context(); return $this; } public function evaluate() { if ( ! $this->_tree ) { throw new \exprlib\exceptions\ParseTreeNotFoundException(); } return $this->_tree->evaluate(); } /*** accessors and mutators ***/ public function get_tree() { return $this->_tree; } public function set_content($content = null) { $this->_content = $content; return $this; } public function get_tokens() { return $this->_tokens; } /******************************************************* * the context stack functions. for the stack im using * an array with the functions array_push, array_pop, * and end to push, pop, and get the current element * from the stack. *******************************************************/ public function push_context( \exprlib\contexts\IfContext $context ) { array_push( $this->_context_stack, $context ); $this->get_context()->set_builder( $this ); } public function pop_context() { return array_pop( $this->_context_stack ); } public function get_context() { return end( $this->_context_stack ); } }
The global scope in this case is also the parent scope of all other scopes:
/exprlib/contexts/Scope.php
<?php namespace exprlib\contexts; class Scope implements namespace\IfContext { protected $_builder = null; protected $_children_contexts = array(); protected $_raw_content = array(); protected $_operations = array(); const T_NUMBER = 1; const T_OPERATOR = 2; const T_SCOPE_OPEN = 3; const T_SCOPE_CLOSE = 4; const T_SIN_SCOPE_OPEN = 5; const T_COS_SCOPE_OPEN = 6; const T_TAN_SCOPE_OPEN = 7; const T_SQRT_SCOPE_OPEN = 8; public function set_builder( \exprlib\Parser $builder ) { $this->_builder = $builder; } public function __toString() { return implode('', $this->_raw_content); } public function add_operation( $operation ) { $this->_operations[] = $operation; } /** * handle the next token from the tokenized list. example actions * on a token would be to add it to the current context expression list, * to push a new context on the the context stack, or pop a context off the * stack. */ public function handle_token( $token ) { $type = null; if ( in_array( $token, array('*','/','+','-','^') ) ) $type = self::T_OPERATOR; if ( $token === ')' ) $type = self::T_SCOPE_CLOSE; if ( $token === '(' ) $type = self::T_SCOPE_OPEN; if ( $token === 'sin(' ) $type = self::T_SIN_SCOPE_OPEN; if ( $token === 'cos(' ) $type = self::T_COS_SCOPE_OPEN; if ( $token === 'tan(' ) $type = self::T_TAN_SCOPE_OPEN; if ( $token === 'sqrt(' ) $type = self::T_SQRT_SCOPE_OPEN; if ( is_null( $type ) ) { if ( is_numeric( $token ) ) { $type = self::T_NUMBER; $token = (float)$token; } } switch ( $type ) { case self::T_NUMBER: case self::T_OPERATOR: $this->_operations[] = $token; break; case self::T_SCOPE_OPEN: $this->_builder->push_context( new namespace\Scope() ); break; case self::T_SIN_SCOPE_OPEN: $this->_builder->push_context( new namespace\SineScope() ); break; case self::T_COS_SCOPE_OPEN: $this->_builder->push_context( new namespace\CosineScope() ); break; case self::T_TAN_SCOPE_OPEN: $this->_builder->push_context( new namespace\TangentScope() ); break; case self::T_SQRT_SCOPE_OPEN: $this->_builder->push_context( new namespace\SqrtScope() ); break; case self::T_SCOPE_CLOSE: $scope_operation = $this->_builder->pop_context(); $new_context = $this->_builder->get_context(); if ( is_null( $scope_operation ) || ( ! $new_context ) ) { # this means there are more closing parentheses than openning throw new \exprlib\exceptions\OutOfScopeException(); } $new_context->add_operation( $scope_operation ); break; default: throw new \exprlib\exceptions\UnknownTokenException($token); break; } } /** * order of operations: * - parentheses, these should all ready be executed before this method is called * - exponents, first order * - mult/divi, second order * - addi/subt, third order */ protected function _expression_loop( & $operation_list ) { while ( list( $i, $operation ) = each ( $operation_list ) ) { if ( ! in_array( $operation, array('^','*','/','+','-') ) ) continue; $left = isset( $operation_list[ $i - 1 ] ) ? (float)$operation_list[ $i - 1 ] : null; $right = isset( $operation_list[ $i + 1 ] ) ? (float)$operation_list[ $i + 1 ] : null; # if ( is_null( $left ) || is_null( $right ) ) throw new \Exception('syntax error'); $first_order = ( in_array('^', $operation_list) ); $second_order = ( in_array('*', $operation_list ) || in_array('/', $operation_list ) ); $third_order = ( in_array('-', $operation_list ) || in_array('+', $operation_list ) ); $remove_sides = true; if ( $first_order ) { switch( $operation ) { case '^': $operation_list[ $i ] = pow( (float)$left, (float)$right ); break; default: $remove_sides = false; break; } } elseif ( $second_order ) { switch ( $operation ) { case '*': $operation_list[ $i ] = (float)($left * $right); break; case '/': $operation_list[ $i ] = (float)($left / $right); break; default: $remove_sides = false; break; } } elseif ( $third_order ) { switch ( $operation ) { case '+': $operation_list[ $i ] = (float)($left + $right); break; case '-': $operation_list[ $i ] = (float)($left - $right); break; default: $remove_sides = false; break; } } if ( $remove_sides ) { unset( $operation_list[ $i + 1 ], $operation_list[ $i - 1 ] ); reset( $operation_list = array_values( $operation_list ) ); } } if ( count( $operation_list ) === 1 ) return end( $operation_list ); return false; } # order of operations: # - sub scopes first # - multiplication, division # - addition, subtraction # evaluating all the sub scopes (recursivly): public function evaluate() { foreach ( $this->_operations as $i => $operation ) { if ( is_object( $operation ) ) { $this->_operations[ $i ] = $operation->evaluate(); } } $operation_list = $this->_operations; while ( true ) { $operation_check = $operation_list; $result = $this->_expression_loop( $operation_list ); if ( $result !== false ) return $result; if ( $operation_check === $operation_list ) { break; } else { reset( $operation_list = array_values( $operation_list ) ); } } throw new \Exception('failed... here'); } }
Here are 2 of the scopes that extends the global (parent) scope: the sine and squared root scopes
/exprlib/contexts/SineScope.php
<?php namespace exprlib\contexts; class SineScope extends namespace\Scope { public function evaluate() { return sin( deg2rad( parent::evaluate() ) ); } }
\exprlib\contexts\SqrtScope.php
<?php namespace exprlib\contexts; class SqrtScope extends namespace\Scope { public function evaluate() { return sqrt( parent::evaluate() ); } }
To conclude, here is a user interface I built for the testing. It's a command line shell which inputs expressions and outputs the evaluated expressions or thrown exception messages in case of error:
#!/usr/bin/php <?php include('exprlib/loaders.php'); $builder = new \exprlib\Parser(); while ( (fputs(STDOUT,'math > ')) && $e = fgets(STDIN) ) { if ( ! ($e = trim($e)) ) continue; if ( in_array( $e, array('quit','exit',':q') ) ) break; try { $result = $builder->set_content($e)->tokenize()->parse()->evaluate(); } catch ( \exprlib\exceptions\UnknownTokenException $exception ) { echo 'unknown token exception thrown in expression: ', $e, PHP_EOL; echo 'token: "',$exception->getMessage(),'"',PHP_EOL; continue; } catch ( \exprlib\exceptions\ParseTreeNotFoundException $exception ) { echo 'parse tree not found (missing content): ', $e, PHP_EOL; continue; } catch ( \exprlib\exceptions\OutOfScopeException $exception ) { echo 'out of scope exception thrown in: ', $e, PHP_EOL; echo 'you should probably count your parentheses', PHP_EOL; continue; } catch ( \Exception $exception ) { echo 'unknown exception thrown: ', $e, PHP_EOL; echo $exception->getMessage(), PHP_EOL; continue; } echo $result, PHP_EOL; }