First a brief walkthrough:
- create a stack of context handlers (a handler must react on passed tokens)
- first step is to tokenize the content into a list of tokens
- push the first (global) context onto the context stack
- iterate through the token list and send the token to the last context handler on the stack
( the context handlers are responsible for pushing/poping contexts onto the stack )
- pop off the last context, this should be the first pushed, the global (the trunk of the tree)
Example input: 3 + ( 4 * 2.3 ) - 1
the tokenized list would be: ['3', '+', '(', '4', '*', '2.3', ')', '-', '1']
- push a global scope onto the context handler stack (context 0)
looping through the tokens:
on '3': the context handler adds 3 to its expression list (context 1)
on '+': the context handler adds + to its expression list (context 1)
on '(': the context handler pushes a new context onto the the context handler stack (context 2)
on '4': the context handler adds 4 to its expression list (context 2)
on '*': the context handler adds * to its expression list (context 2)
on '2.3': the context handler adds 2.3 to its expression list (context 2)
on ')': the context handler pops off 'context 2', adds it to the expression list of context 1
on '-': the context handler adds - to the expression list (context 1)
on '1': the context handler adds 1 to the expression list (context 1)
context 1 looks like this: 3 + (context 2) - 1
context 2 looks like this: 4 * 2.3
Now that the data is parsed into the tree structure, some recursive evaluation and we've got the result.
/exprlib/Parser.php
<?php
namespace exprlib;
/**
* this model handles the tokenizing, the context stack functions, and
* the parsing (token list to tree trans).
* as well as an evaluate method which delegates to the global scopes evaluate.
*/
class Parser {
protected $_content = null;
protected $_context_stack = array();
protected $_tree = null;
protected $_tokens = array();
public function __construct($content = null) {
if ( $content ) {
$this->set_content( $content );
}
}
/**
* this function does some simple syntax cleaning:
* - removes all spaces
* - replaces '**' by '^'
* then it runs a regex to split the contents into tokens. the set
* of possible tokens in this case is predefined to numbers (ints of floats)
* math operators (*, -, +, /, **, ^) and parentheses.
*/
public function tokenize() {
$this->_content = str_replace(array("\n","\r","\t"," "), '', $this->_content);
$this->_content = str_replace('**', '^', $this->_content);
$this->_content = str_replace('PI', (string)PI(), $this->_content);
$this->_tokens = preg_split(
'@([\d\.]+)|(sin\(|cos\(|tan\(|sqrt\(|\+|\-|\*|/|\^|\(|\))@',
$this->_content,
null,
PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
);
return $this;
}
/**
* this is the the loop that transforms the tokens array into
* a tree structure.
*/
public function parse() {
# this is the global scope which will contain the entire tree
$this->push_context( new \exprlib\contexts\Scope() );
foreach ( $this->_tokens as $token ) {
# get the last context model from the context stack,
# and have it handle the next token
$this->get_context()->handle_token( $token );
}
$this->_tree = $this->pop_context();
return $this;
}
public function evaluate() {
if ( ! $this->_tree ) {
throw new \exprlib\exceptions\ParseTreeNotFoundException();
}
return $this->_tree->evaluate();
}
/*** accessors and mutators ***/
public function get_tree() {
return $this->_tree;
}
public function set_content($content = null) {
$this->_content = $content;
return $this;
}
public function get_tokens() {
return $this->_tokens;
}
/*******************************************************
* the context stack functions. for the stack im using
* an array with the functions array_push, array_pop,
* and end to push, pop, and get the current element
* from the stack.
*******************************************************/
public function push_context( \exprlib\contexts\IfContext $context ) {
array_push( $this->_context_stack, $context );
$this->get_context()->set_builder( $this );
}
public function pop_context() {
return array_pop( $this->_context_stack );
}
public function get_context() {
return end( $this->_context_stack );
}
}The global scope in this case is also the parent scope of all other scopes:
/exprlib/contexts/Scope.php
<?php
namespace exprlib\contexts;
class Scope implements namespace\IfContext {
protected $_builder = null;
protected $_children_contexts = array();
protected $_raw_content = array();
protected $_operations = array();
const T_NUMBER = 1;
const T_OPERATOR = 2;
const T_SCOPE_OPEN = 3;
const T_SCOPE_CLOSE = 4;
const T_SIN_SCOPE_OPEN = 5;
const T_COS_SCOPE_OPEN = 6;
const T_TAN_SCOPE_OPEN = 7;
const T_SQRT_SCOPE_OPEN = 8;
public function set_builder( \exprlib\Parser $builder ) {
$this->_builder = $builder;
}
public function __toString() {
return implode('', $this->_raw_content);
}
public function add_operation( $operation ) {
$this->_operations[] = $operation;
}
/**
* handle the next token from the tokenized list. example actions
* on a token would be to add it to the current context expression list,
* to push a new context on the the context stack, or pop a context off the
* stack.
*/
public function handle_token( $token ) {
$type = null;
if ( in_array( $token, array('*','/','+','-','^') ) ) $type = self::T_OPERATOR;
if ( $token === ')' ) $type = self::T_SCOPE_CLOSE;
if ( $token === '(' ) $type = self::T_SCOPE_OPEN;
if ( $token === 'sin(' ) $type = self::T_SIN_SCOPE_OPEN;
if ( $token === 'cos(' ) $type = self::T_COS_SCOPE_OPEN;
if ( $token === 'tan(' ) $type = self::T_TAN_SCOPE_OPEN;
if ( $token === 'sqrt(' ) $type = self::T_SQRT_SCOPE_OPEN;
if ( is_null( $type ) ) {
if ( is_numeric( $token ) ) {
$type = self::T_NUMBER;
$token = (float)$token;
}
}
switch ( $type ) {
case self::T_NUMBER:
case self::T_OPERATOR:
$this->_operations[] = $token;
break;
case self::T_SCOPE_OPEN:
$this->_builder->push_context( new namespace\Scope() );
break;
case self::T_SIN_SCOPE_OPEN:
$this->_builder->push_context( new namespace\SineScope() );
break;
case self::T_COS_SCOPE_OPEN:
$this->_builder->push_context( new namespace\CosineScope() );
break;
case self::T_TAN_SCOPE_OPEN:
$this->_builder->push_context( new namespace\TangentScope() );
break;
case self::T_SQRT_SCOPE_OPEN:
$this->_builder->push_context( new namespace\SqrtScope() );
break;
case self::T_SCOPE_CLOSE:
$scope_operation = $this->_builder->pop_context();
$new_context = $this->_builder->get_context();
if ( is_null( $scope_operation ) || ( ! $new_context ) ) {
# this means there are more closing parentheses than openning
throw new \exprlib\exceptions\OutOfScopeException();
}
$new_context->add_operation( $scope_operation );
break;
default:
throw new \exprlib\exceptions\UnknownTokenException($token);
break;
}
}
/**
* order of operations:
* - parentheses, these should all ready be executed before this method is called
* - exponents, first order
* - mult/divi, second order
* - addi/subt, third order
*/
protected function _expression_loop( & $operation_list ) {
while ( list( $i, $operation ) = each ( $operation_list ) ) {
if ( ! in_array( $operation, array('^','*','/','+','-') ) ) continue;
$left = isset( $operation_list[ $i - 1 ] ) ? (float)$operation_list[ $i - 1 ] : null;
$right = isset( $operation_list[ $i + 1 ] ) ? (float)$operation_list[ $i + 1 ] : null;
# if ( is_null( $left ) || is_null( $right ) ) throw new \Exception('syntax error');
$first_order = ( in_array('^', $operation_list) );
$second_order = ( in_array('*', $operation_list ) || in_array('/', $operation_list ) );
$third_order = ( in_array('-', $operation_list ) || in_array('+', $operation_list ) );
$remove_sides = true;
if ( $first_order ) {
switch( $operation ) {
case '^': $operation_list[ $i ] = pow( (float)$left, (float)$right ); break;
default: $remove_sides = false; break;
}
} elseif ( $second_order ) {
switch ( $operation ) {
case '*': $operation_list[ $i ] = (float)($left * $right); break;
case '/': $operation_list[ $i ] = (float)($left / $right); break;
default: $remove_sides = false; break;
}
} elseif ( $third_order ) {
switch ( $operation ) {
case '+': $operation_list[ $i ] = (float)($left + $right); break;
case '-': $operation_list[ $i ] = (float)($left - $right); break;
default: $remove_sides = false; break;
}
}
if ( $remove_sides ) {
unset( $operation_list[ $i + 1 ], $operation_list[ $i - 1 ] );
reset( $operation_list = array_values( $operation_list ) );
}
}
if ( count( $operation_list ) === 1 ) return end( $operation_list );
return false;
}
# order of operations:
# - sub scopes first
# - multiplication, division
# - addition, subtraction
# evaluating all the sub scopes (recursivly):
public function evaluate() {
foreach ( $this->_operations as $i => $operation ) {
if ( is_object( $operation ) ) {
$this->_operations[ $i ] = $operation->evaluate();
}
}
$operation_list = $this->_operations;
while ( true ) {
$operation_check = $operation_list;
$result = $this->_expression_loop( $operation_list );
if ( $result !== false ) return $result;
if ( $operation_check === $operation_list ) {
break;
} else {
reset( $operation_list = array_values( $operation_list ) );
}
}
throw new \Exception('failed... here');
}
}Here are 2 of the scopes that extends the global (parent) scope: the sine and squared root scopes
/exprlib/contexts/SineScope.php
<?php
namespace exprlib\contexts;
class SineScope extends namespace\Scope {
public function evaluate() {
return sin( deg2rad( parent::evaluate() ) );
}
}\exprlib\contexts\SqrtScope.php
<?php
namespace exprlib\contexts;
class SqrtScope extends namespace\Scope {
public function evaluate() {
return sqrt( parent::evaluate() );
}
}To conclude, here is a user interface I built for the testing. It's a command line shell which inputs expressions and outputs the evaluated expressions or thrown exception messages in case of error:
#!/usr/bin/php
<?php
include('exprlib/loaders.php');
$builder = new \exprlib\Parser();
while ( (fputs(STDOUT,'math > ')) && $e = fgets(STDIN) ) {
if ( ! ($e = trim($e)) ) continue;
if ( in_array( $e, array('quit','exit',':q') ) ) break;
try {
$result = $builder->set_content($e)->tokenize()->parse()->evaluate();
} catch ( \exprlib\exceptions\UnknownTokenException $exception ) {
echo 'unknown token exception thrown in expression: ', $e, PHP_EOL;
echo 'token: "',$exception->getMessage(),'"',PHP_EOL;
continue;
} catch ( \exprlib\exceptions\ParseTreeNotFoundException $exception ) {
echo 'parse tree not found (missing content): ', $e, PHP_EOL;
continue;
} catch ( \exprlib\exceptions\OutOfScopeException $exception ) {
echo 'out of scope exception thrown in: ', $e, PHP_EOL;
echo 'you should probably count your parentheses', PHP_EOL;
continue;
} catch ( \Exception $exception ) {
echo 'unknown exception thrown: ', $e, PHP_EOL;
echo $exception->getMessage(), PHP_EOL;
continue;
}
echo $result, PHP_EOL;
}