Le code suivant se compose des classes Tokenizer, Token et QueryBuilder. Ce n'est probablement pas la solution la plus élégante qui soit, mais elle fait en fait ce que vous demandiez :
<?
// QueryBuilder Grammar:
// =====================
// SearchRule := SimpleSearchRule { KeyWord }
// SimpleSearchRule := Expression | SimpleSearchRule { 'OR' Expression }
// Expression := SimpleExpression | Expression { 'AND' SimpleExpression }
// SimpleExpression := '(' SimpleSearchRule ')' | FieldExpression
$input = 'from:me AND (to:john OR to:jenny) dinner party';
$fieldMapping = array(
'id' => 'id',
'from' => 'from',
'to' => 'to',
'title' => 'title',
'description' => 'description',
'time_created' => 'time_created'
);
$fullTextFields = array('title','description');
$qb = new QueryBuilder($fieldMapping, $fullTextFields);
try {
echo $qb->parseSearchRule($input);
} catch(Exception $error) {
echo 'Error occurred while parsing search query: <br/>'.$error->getMessage();
}
class Token {
const KEYWORD = 'KEYWORD',
OPEN_PAR='OPEN_PAR',
CLOSE_PAR='CLOSE_PAR',
FIELD='FIELD',
AND_OP='AND_OP',
OR_OP='OR_OP';
public $type;
public $chars;
public $position;
function __construct($type,$chars,$position) {
$this->type = $type;
$this->chars = $chars;
$this->position = $position;
}
function __toString() {
return 'Token[ type='.$this->type.', chars='.$this->chars.', position='.$this->position.' ]';
}
}
class Tokenizer {
private $tokens = array();
private $input;
private $currentPosition;
function __construct($input) {
$this->input = trim($input);
$this->currentPosition = 0;
}
/**
* @return Token
*/
function getToken() {
if(count($this->tokens)==0) {
$token = $this->nextToken();
if($token==null) {
return null;
}
array_push($this->tokens, $token);
}
return $this->tokens[0];
}
function consumeToken() {
$token = $this->getToken();
if($token==null) {
return null;
}
array_shift($this->tokens);
return $token;
}
protected function nextToken() {
$reservedCharacters = '\:\s\(\)';
$fieldExpr = '/^([^'.$reservedCharacters.']+)\:([^'.$reservedCharacters.']+)/';
$keyWord = '/^([^'.$reservedCharacters.']+)/';
$andOperator = '/^AND\s/';
$orOperator = '/^OR\s/';
// Remove whitespaces ..
$whiteSpaces = '/^\s+/';
$remaining = substr($this->input,$this->currentPosition);
if(preg_match($whiteSpaces, $remaining, $matches)) {
$this->currentPosition += strlen($matches[0]);
$remaining = substr($this->input,$this->currentPosition);
}
if($remaining=='') {
return null;
}
switch(substr($remaining,0,1)) {
case '(':
return new Token(Token::OPEN_PAR,'(',$this->currentPosition++);
case ')':
return new Token(Token::CLOSE_PAR,')',$this->currentPosition++);
}
if(preg_match($fieldExpr, $remaining, $matches)) {
$token = new Token(Token::FIELD, $matches[0], $this->currentPosition);
$this->currentPosition += strlen($matches[0]);
} else if(preg_match($andOperator, $remaining, $matches)) {
$token = new Token(Token::AND_OP, 'AND', $this->currentPosition);
$this->currentPosition += 3;
} else if(preg_match($orOperator, $remaining, $matches)) {
$token = new Token(Token::OR_OP, 'OR', $this->currentPosition);
$this->currentPosition += 2;
} else if(preg_match($keyWord, $remaining, $matches)) {
$token = new Token(Token::KEYWORD, $matches[0], $this->currentPosition);
$this->currentPosition += strlen($matches[0]);
} else throw new Exception('Unable to tokenize: '.$remaining);
return $token;
}
}
class QueryBuilder {
private $fieldMapping;
private $fulltextFields;
function __construct($fieldMapping, $fulltextFields) {
$this->fieldMapping = $fieldMapping;
$this->fulltextFields = $fulltextFields;
}
function parseSearchRule($input) {
$t = new Tokenizer($input);
$token = $t->getToken();
if($token==null) {
return '';
}
$token = $t->getToken();
if($token->type!=Token::KEYWORD) {
$searchRule = $this->parseSimpleSearchRule($t);
} else {
$searchRule = '';
}
$keywords = '';
while($token = $t->consumeToken()) {
if($token->type!=Token::KEYWORD) {
throw new Exception('Only keywords allowed at end of search rule.');
}
if($keywords!='') {
$keywords .= ' ';
}
$keywords .= $token->chars;
}
if($keywords!='') {
$matchClause = 'MATCH (`'.(implode('`,`',$this->fulltextFields)).'`) AGAINST (';
$keywords = $matchClause.'\''.mysql_real_escape_string($keywords).'\' IN BOOLEAN MODE)';
if($searchRule=='') {
$searchRule = $keywords;
} else {
$searchRule = '('.$searchRule.') AND ('.$keywords.')';
}
}
return $searchRule;
}
protected function parseSimpleSearchRule(Tokenizer $t) {
$expressions = array();
do {
$repeat = false;
$expressions[] = $this->parseExpression($t);
$token = $t->getToken();
if($token->type==Token::OR_OP) {
$t->consumeToken();
$repeat = true;
}
} while($repeat);
return implode(' OR ', $expressions);
}
protected function parseExpression(Tokenizer $t) {
$expressions = array();
do {
$repeat = false;
$expressions[] = $this->parseSimpleExpression($t);
$token = $t->getToken();
if($token->type==Token::AND_OP) {
$t->consumeToken();
$repeat = true;
}
} while($repeat);
return implode(' AND ', $expressions);
}
protected function parseSimpleExpression(Tokenizer $t) {
$token = $t->consumeToken();
if($token->type==Token::OPEN_PAR) {
$spr = $this->parseSimpleSearchRule($t);
$token = $t->consumeToken();
if($token==null || $token->type!=Token::CLOSE_PAR) {
throw new Exception('Expected closing parenthesis, found: '.$token->chars);
}
return '('.$spr.')';
} else if($token->type==Token::FIELD) {
$fieldVal = explode(':', $token->chars,2);
if(isset($this->fieldMapping[$fieldVal[0]])) {
return '`'.$this->fieldMapping[$fieldVal[0]].'` = \''.mysql_real_escape_string($fieldVal[1]).'\'';
}
throw new Exception('Unknown field selected: '.$token->chars);
} else {
throw new Exception('Expected opening parenthesis or field-expression, found: '.$token->chars);
}
}
}
?>
Une solution plus appropriée consisterait d'abord à créer un arbre d'analyse, puis à le transformer en requête, après une analyse plus approfondie.