1301 lines
43 KiB
PHP
1301 lines
43 KiB
PHP
<?php
|
|
class Lexicon
|
|
{
|
|
private $debug = false;
|
|
private $sentence = '';
|
|
private $generalContext = '';
|
|
private $precise = array();
|
|
|
|
|
|
private $words = array();
|
|
private $word_count = '';
|
|
private $wordTally = array();
|
|
|
|
private $word_analyzed = array();
|
|
|
|
private static $dictionary = array();
|
|
private static $gram1highlighters = array();
|
|
private static $gram2Highlighters = array();
|
|
private static $gram3Highlighters = array();
|
|
private static $gram4Highlighters = array();
|
|
private static $gram5Highlighters = array();
|
|
|
|
private static $initiatedOn = 0;
|
|
|
|
function __construct($sentence)
|
|
{
|
|
$this->sentence = $sentence;
|
|
}
|
|
|
|
function __initiate()
|
|
{
|
|
$now = intval(date("YmdHis"));
|
|
if(($now - self::$initiatedOn) > 30)//(30*60*60)
|
|
{
|
|
self::$initiatedOn = $now;
|
|
|
|
$this->__generateMETRICs();
|
|
$this->__generateGENDERs();
|
|
$this->__generateGENDERCLASSIFIERs();
|
|
$this->__generateSPORTCLASSIFIERs();
|
|
|
|
$this->__generate2GRAMIDENTIFIERSs();
|
|
$this->__generate3GRAMIDENTIFIERSs();
|
|
$this->__generate4GRAMIDENTIFIERSs();
|
|
$this->__generate5GRAMIDENTIFIERSs();
|
|
|
|
$this->__generateNOUNs();
|
|
$this->__generatePRONOUNs();
|
|
$this->__generateVERBs();
|
|
$this->__generateADJECTIVEs();
|
|
$this->__generateADVERBs();
|
|
$this->__generatePREPOSITIONs();
|
|
$this->__generateCONJUNCTIONs();
|
|
$this->__generateINTERJECTIONs();
|
|
|
|
$this->__generateHEADERCOLs();
|
|
}
|
|
}
|
|
|
|
function preAnalyse()
|
|
{
|
|
$this->__initiate();
|
|
|
|
$this->words = self::getWords($this->sentence);
|
|
$this->word_count = count($this->words);
|
|
$this->word_analyzed = $this->__analyzeSentence($this->sentence);
|
|
|
|
$this->wordTally = $this->__createWordTally($this->word_analyzed);
|
|
|
|
$this->generalContext = $this->__inferGeneralContext($this->wordTally);
|
|
}
|
|
|
|
function analyse()
|
|
{
|
|
|
|
$this->preAnalyse();
|
|
|
|
$gram1Analysis = $this->__createPrecise($this->word_analyzed);
|
|
|
|
$this->precise['CONTEXT'] = $gram1Analysis;
|
|
$this->precise['CLARITY'] = array();
|
|
|
|
$gram1Analysis = $this->__1GramAnalysis($this->word_analyzed);
|
|
$this->__updateContext($gram1Analysis);
|
|
|
|
$gram2Analysis = $this->__2GramAnalysis($this->words);
|
|
$this->__updateContext($gram2Analysis);
|
|
|
|
$gram3Analysis = $this->__3GramAnalysis($this->words);
|
|
$this->__updateContext($gram3Analysis);
|
|
|
|
$gram4Analysis = $this->__4GramAnalysis($this->words);
|
|
$this->__updateContext($gram4Analysis);
|
|
|
|
$gram5Analysis = $this->__5GramAnalysis($this->words);
|
|
$this->__updateContext($gram5Analysis);
|
|
|
|
$this->precise['CONTEXT'] = array_unique($this->precise['CONTEXT']);
|
|
}
|
|
|
|
function getAnalysisForNames()
|
|
{
|
|
$this->analyse();
|
|
|
|
$nameAnalysis = array();
|
|
$nameParts = array();
|
|
|
|
if($this->words[0]==="#")
|
|
{
|
|
if(is_numeric($this->words[1]))
|
|
{
|
|
$nameAnalysis['ROLL-NO'] = $this->words[0].' '.$this->words[1];
|
|
}
|
|
for($i=2; $i<sizeof($this->words);$i++)
|
|
{
|
|
$nameParts[] = $this->words[$i];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
$nameParts = $this->words;
|
|
}
|
|
|
|
$nameStartFrom = 0;
|
|
$nameEndsAt = sizeof($nameParts);
|
|
if(strpos($nameParts[0],",")!==false)
|
|
{
|
|
@$nameAnalysis['LAST-NAME'] = str_replace(",","",$nameParts[0]);
|
|
$nameStartFrom = 1;
|
|
}
|
|
else
|
|
{
|
|
@$nameAnalysis['LAST-NAME'] = $nameParts[$nameEndsAt-1];
|
|
$nameEndsAt = $nameEndsAt-1;
|
|
}
|
|
|
|
for($i=$nameStartFrom; $i<$nameEndsAt; $i++)
|
|
{
|
|
@$nameAnalysis['FIRST-NAME'] .= ' '.$nameParts[$i];
|
|
}
|
|
|
|
$nameAnalysis['FULL-NAME'] = trim(@$nameAnalysis['FIRST-NAME'].' '.@$nameAnalysis['LAST-NAME']);
|
|
|
|
return $nameAnalysis;
|
|
}
|
|
|
|
function getGeneralContext()
|
|
{
|
|
return $this->generalContext;
|
|
}
|
|
|
|
function getPrecise()
|
|
{
|
|
return $this->precise;
|
|
}
|
|
|
|
function getLeadPhrase()
|
|
{
|
|
$return = "";
|
|
if(in_array('2W-PHRASE',$this->precise['CONTEXT']))
|
|
{
|
|
$w2Phrase = $this->precise['CLARITY']['2W-PHRASE'][0];
|
|
$wordStEnd = ParserUtility::get_start_end_pos(trim($this->sentence), $w2Phrase);
|
|
if($wordStEnd[0]['START-POSITION']==0)
|
|
{
|
|
$return = $w2Phrase;
|
|
}
|
|
}
|
|
if(in_array('3W-PHRASE',$this->precise['CONTEXT']))
|
|
{
|
|
$w3Phrase = $this->precise['CLARITY']['3W-PHRASE'][0];
|
|
$wordStEnd = ParserUtility::get_start_end_pos(trim($this->sentence), $w3Phrase);
|
|
if($wordStEnd[0]['START-POSITION']==0)
|
|
{
|
|
$return = $w3Phrase;
|
|
}
|
|
}
|
|
return $return;
|
|
}
|
|
|
|
function getOtherPhrases()
|
|
{
|
|
$return = array();
|
|
if(in_array('2W-PHRASE',$this->precise['CONTEXT']))
|
|
{
|
|
$cnt = 0;
|
|
foreach($this->precise['CLARITY']['2W-PHRASE'] as $phrase)
|
|
{
|
|
if($cnt>0)
|
|
{
|
|
$return[] = $phrase;
|
|
}
|
|
$cnt++;
|
|
}
|
|
}
|
|
if(in_array('3W-PHRASE',$this->precise['CONTEXT']))
|
|
{
|
|
$return = array();
|
|
foreach($this->precise['CLARITY']['3W-PHRASE'] as $phrase)
|
|
{
|
|
if($cnt>0)
|
|
{
|
|
$return[] = $phrase;
|
|
}
|
|
$cnt++;
|
|
}
|
|
}
|
|
if(in_array('4W-PHRASE',$this->precise['CONTEXT']))
|
|
{
|
|
$return = array();
|
|
foreach($this->precise['CLARITY']['4W-PHRASE'] as $phrase)
|
|
{
|
|
if($cnt>0)
|
|
{
|
|
$return[] = $phrase;
|
|
}
|
|
$cnt++;
|
|
}
|
|
}
|
|
if(in_array('5W-PHRASE',$this->precise['CONTEXT']))
|
|
{
|
|
$return = array();
|
|
foreach($this->precise['CLARITY']['5W-PHRASE'] as $phrase)
|
|
{
|
|
if($cnt>0)
|
|
{
|
|
$return[] = $phrase;
|
|
}
|
|
$cnt++;
|
|
}
|
|
}
|
|
return $return;
|
|
}
|
|
|
|
function getMetricPhrase()
|
|
{
|
|
$return = "";
|
|
if(in_array('METRIC-PHRASE',$this->precise['CONTEXT']))
|
|
{
|
|
$return = $this->precise['CLARITY']['METRIC-PHRASE'][0];
|
|
}
|
|
return $return;
|
|
}
|
|
|
|
function getGender()
|
|
{
|
|
$return = "";
|
|
if(in_array('MALE',$this->precise['CONTEXT']))
|
|
{
|
|
$return = 'MALE';
|
|
}
|
|
elseif(in_array('FEMALE',$this->precise['CONTEXT']))
|
|
{
|
|
$return = 'FEMALE';
|
|
}
|
|
|
|
return $return;
|
|
}
|
|
|
|
function getSports()
|
|
{
|
|
$return = "";
|
|
foreach($this->precise['CONTEXT'] as $context)
|
|
{
|
|
if(in_array($context, self::$dictionary['SPORTS_CLASSIFIER']))
|
|
{
|
|
$return = $this->precise['CLARITY'][$context][0];
|
|
}
|
|
}
|
|
|
|
return $return;
|
|
}
|
|
|
|
function getSportsType()
|
|
{
|
|
$return = "";
|
|
foreach($this->precise['CONTEXT'] as $context)
|
|
{
|
|
if(in_array($context,self::$dictionary['SPORTS_CLASSIFIER']))
|
|
{
|
|
$return = $context;
|
|
}
|
|
}
|
|
return $return;
|
|
}
|
|
|
|
function getSportsRound()
|
|
{
|
|
$return = "";
|
|
if(is_array(@$this->precise['CONTEXT']))
|
|
{
|
|
foreach(@$this->precise['CONTEXT'] as $context)
|
|
{
|
|
if(in_array($context,self::$dictionary['SPORTS-ROUND']))
|
|
{
|
|
$return = $context;
|
|
}
|
|
}
|
|
}
|
|
return $return;
|
|
}
|
|
|
|
function getSportsLevel()
|
|
{
|
|
$return = "";
|
|
if(is_array(@$this->precise['CONTEXT']))
|
|
{
|
|
foreach(@$this->precise['CONTEXT'] as $context)
|
|
{
|
|
if(in_array($context,self::$dictionary['SPORTS-LEVEL']))
|
|
{
|
|
$return = $context;
|
|
}
|
|
}
|
|
}
|
|
return $return;
|
|
}
|
|
|
|
static function getDictionary()
|
|
{
|
|
return self::$dictionary;
|
|
}
|
|
|
|
function __updateContext($analysis)
|
|
{
|
|
if(sizeof($analysis) > 0)
|
|
{
|
|
foreach($analysis as $keyWord => $gram)
|
|
{
|
|
if(in_array($keyWord,self::$dictionary['SPORTS_CLASSIFIER']))
|
|
{
|
|
foreach($this->precise['CONTEXT'] as $key => $lookpSportsClassifier)
|
|
{
|
|
if(in_array($lookpSportsClassifier,self::$dictionary['SPORTS_CLASSIFIER']))
|
|
{
|
|
unset($this->precise['CONTEXT'][$key]);
|
|
unset($this->precise['CLARITY'][$lookpSportsClassifier]);
|
|
}
|
|
}
|
|
}
|
|
|
|
array_push($this->precise['CONTEXT'], $keyWord);
|
|
$this->precise['CLARITY'][$keyWord] = $gram;
|
|
}
|
|
}
|
|
}
|
|
|
|
function __analyzeSentence($sentence)
|
|
{
|
|
$word_analyzed = array();
|
|
$words = self::getWords($sentence);
|
|
foreach($words as $word)
|
|
{
|
|
$found = false;
|
|
foreach(self::$dictionary as $classification=>$dicElement)
|
|
{
|
|
if(in_array(strtolower($word),$dicElement))
|
|
{
|
|
$found = true;
|
|
$word_analyzed[$word]['CLASSIFICATION'][] = $classification;
|
|
}
|
|
}
|
|
|
|
if(!$found)
|
|
{
|
|
$classification = 'NOUN';
|
|
if(is_numeric($word))
|
|
{
|
|
$classification = 'NUMBER';
|
|
}
|
|
elseif(strpos($word, ":")!==false)
|
|
{
|
|
$expld = explode(":",$word);
|
|
foreach($expld as $exp)
|
|
{
|
|
$classification = 'NUMBER';
|
|
if(is_numeric($exp)===false)
|
|
{
|
|
$classification = 'NOUN';
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
elseif(strpos($word, "-")!==false)
|
|
{
|
|
$expld = explode("-",$word);
|
|
foreach($expld as $exp)
|
|
{
|
|
$classification = 'NUMBER';
|
|
if(is_numeric($exp)===false)
|
|
{
|
|
$classification = 'NOUN';
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
$word_analyzed[$word]['CLASSIFICATION'][] = $classification;
|
|
}
|
|
}
|
|
|
|
return $word_analyzed;
|
|
}
|
|
|
|
function __createPrecise($word_analyzed,$analyze=true)
|
|
{
|
|
$precise = array();
|
|
$thisPrecise = array();
|
|
foreach($word_analyzed as $analysis)
|
|
{
|
|
foreach($analysis['CLASSIFICATION'] as $analogy)
|
|
{
|
|
$precise[] = $analogy;
|
|
$thisPrecise[] = $analogy;
|
|
}
|
|
}
|
|
|
|
if($analyze)
|
|
{
|
|
$thisPrecise = array();
|
|
foreach($precise as $preci)
|
|
{
|
|
if(in_array($preci, self::$gram1highlighters))
|
|
{
|
|
$thisPrecise[] = $preci;
|
|
}
|
|
}
|
|
}
|
|
return $thisPrecise;
|
|
}
|
|
|
|
function __createWordTally($word_analyzed)
|
|
{
|
|
$wordCount = count($word_analyzed);
|
|
$tally = array();
|
|
$tally["WORDCOUNT"] = $wordCount;
|
|
$cntr = 0;
|
|
foreach($word_analyzed as $wrd=>$analysis)
|
|
{
|
|
$tally["WORD"][$wrd]['COUNT'] = (@$tally["WORD"][$wrd]['COUNT'])+1;
|
|
$tally["WORD"][$wrd]['POSITION'][] = $cntr;
|
|
$tally["WORD"][$wrd]['PERCENT'] = round(($tally["WORD"][$wrd]['COUNT'] / $wordCount )*100,3);
|
|
|
|
$analogy = $analysis['CLASSIFICATION'][0];
|
|
|
|
$tally["WORDTYPE"][$analogy]['COUNT'] = (@$tally["WORDTYPE"][$analogy]['COUNT'])+1;
|
|
$tally["WORDTYPE"][$analogy]['POSITION'][] = $cntr;
|
|
$tally["WORDTYPE"][$analogy]['PERCENT'] = round(($tally["WORDTYPE"][$analogy]['COUNT'] / $wordCount )*100,3);
|
|
|
|
$cntr++;
|
|
}
|
|
return $tally;
|
|
}
|
|
|
|
function __inferGeneralContext($wordTally)
|
|
{
|
|
$wordCount = $wordTally['WORDCOUNT'];
|
|
|
|
if($wordCount==0)
|
|
{
|
|
return "LINE-BREAK";
|
|
}
|
|
|
|
if($wordCount==1)
|
|
{
|
|
foreach($wordTally["WORD"] as $_wrd=>$restOfIt)
|
|
{
|
|
$tally = array();
|
|
$len = strlen("".$_wrd."");
|
|
for ($i = 0; $i < $len; $i++)
|
|
{
|
|
$tally[$_wrd[$i]]['COUNT'] = (@$tally[$_wrd[$i]]['COUNT']) + 1;
|
|
$tally[$_wrd[$i]]['PERCENT'] = ($tally[$_wrd[$i]]['COUNT']/$len) * 100;
|
|
}
|
|
|
|
if(count($tally)==1 && $len > 5)
|
|
{
|
|
return "DECORATOR";
|
|
}
|
|
}
|
|
}
|
|
|
|
$nounPercent = @$wordTally['WORDTYPE']['NOUN']['PERCENT'];
|
|
$headerPercent = @$wordTally['WORDTYPE']['HEADER-COL']['PERCENT'];
|
|
|
|
$sportsLvlPercent = 0;
|
|
|
|
foreach(self::$dictionary['SPORTS-ROUND'] as $slevel)
|
|
{
|
|
$sLvlPc = @$wordTally['WORDTYPE'][$slevel]['PERCENT'];
|
|
if($sportsLvlPercent < $sLvlPc)
|
|
{
|
|
$sportsLvlPercent = $sLvlPc;
|
|
}
|
|
}
|
|
|
|
|
|
if($wordCount == 1 && $sportsLvlPercent == 100)
|
|
{
|
|
return "SPORTS-ROUND";
|
|
}
|
|
elseif($wordCount > 1 && $headerPercent >= 50)
|
|
{
|
|
return "HEADER";
|
|
}
|
|
elseif($nounPercent >= 50)
|
|
{
|
|
return "SENTENCE";
|
|
}
|
|
else
|
|
{
|
|
return "INFO";
|
|
}
|
|
}
|
|
|
|
function __1GramAnalysis($word_analyzed)
|
|
{
|
|
$gram1Analysis = array();
|
|
|
|
foreach($word_analyzed as $word=>$class)
|
|
{
|
|
if(in_array('GENDER',$class['CLASSIFICATION']))
|
|
{
|
|
$gram1Analysis[$class['CLASSIFICATION'][1]][] = $word;
|
|
}
|
|
|
|
if(in_array($class['CLASSIFICATION'][0],self::$dictionary['SPORTS_CLASSIFIER']))
|
|
{
|
|
$gram1Analysis[$class['CLASSIFICATION'][0]][] = $word;
|
|
}
|
|
}
|
|
return $gram1Analysis;
|
|
}
|
|
|
|
function __2GramAnalysis($words)
|
|
{
|
|
$gram2Analysis = array();
|
|
$precise = $this->__nGramAnalysis(2,$this->words);
|
|
|
|
if(is_array(@$precise['PRECIS']))
|
|
{
|
|
foreach($precise['PRECIS'] as $preci)
|
|
{
|
|
$ph = implode(" ",$preci['DET']);
|
|
$jw = strtolower($preci['JWORDS']);
|
|
|
|
$found = false;
|
|
foreach(self::$dictionary as $keyWord=>$keyPhrases)
|
|
{
|
|
if(in_array(strtolower($jw),$keyPhrases))
|
|
{
|
|
$found = true;
|
|
$gram2Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
}
|
|
|
|
if(!$found)
|
|
{
|
|
foreach(self::$gram2Highlighters as $keyWord=>$keyPhrases)
|
|
{
|
|
if(@in_array($ph,$keyPhrases))
|
|
{
|
|
$gram2Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
if(@in_array($jw,$keyPhrases))
|
|
{
|
|
$gram2Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return $gram2Analysis;
|
|
}
|
|
|
|
function __3GramAnalysis($words)
|
|
{
|
|
$gram3Analysis = array();
|
|
$precise = $this->__nGramAnalysis(3,$this->words);
|
|
|
|
if(is_array(@$precise['PRECIS']))
|
|
{
|
|
foreach($precise['PRECIS'] as $preci)
|
|
{
|
|
$ph = implode(" ",$preci['DET']);
|
|
$jw = strtolower($preci['JWORDS']);
|
|
|
|
$found = false;
|
|
foreach(self::$dictionary as $keyWord=>$keyPhrases)
|
|
{
|
|
if(in_array(strtolower($jw),$keyPhrases))
|
|
{
|
|
$found = true;
|
|
$gram3Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
}
|
|
|
|
if(!$found)
|
|
{
|
|
foreach(self::$gram3Highlighters as $keyWord=>$keyPhrases)
|
|
{
|
|
if(@in_array($ph,$keyPhrases))
|
|
{
|
|
$gram3Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
if(@in_array($jw,$keyPhrases))
|
|
{
|
|
$gram3Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return $gram3Analysis;
|
|
}
|
|
|
|
function __4GramAnalysis($words)
|
|
{
|
|
$gram4Analysis = array();
|
|
$precise = $this->__nGramAnalysis(4,$this->words);
|
|
|
|
if(is_array(@$precise['PRECIS']))
|
|
{
|
|
foreach($precise['PRECIS'] as $preci)
|
|
{
|
|
$ph = implode(" ",$preci['DET']);
|
|
$jw = strtolower($preci['JWORDS']);
|
|
|
|
$found = false;
|
|
foreach(self::$dictionary as $keyWord=>$keyPhrases)
|
|
{
|
|
if(in_array(strtolower($jw),$keyPhrases))
|
|
{
|
|
$found = true;
|
|
$gram4Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
}
|
|
|
|
if(!$found)
|
|
{
|
|
foreach(self::$gram4Highlighters as $keyWord=>$keyPhrases)
|
|
{
|
|
if(@in_array($ph,$keyPhrases))
|
|
{
|
|
$gram4Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
if(@in_array($jw,$keyPhrases))
|
|
{
|
|
$gram4Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return $gram4Analysis;
|
|
}
|
|
|
|
function __5GramAnalysis($words)
|
|
{
|
|
$gram5Analysis = array();
|
|
$precise = $this->__nGramAnalysis(5,$this->words);
|
|
|
|
if(is_array(@$precise['PRECIS']))
|
|
{
|
|
foreach($precise['PRECIS'] as $preci)
|
|
{
|
|
$ph = implode(" ",$preci['DET']);
|
|
$jw = strtolower($preci['JWORDS']);
|
|
|
|
$found = false;
|
|
foreach(self::$dictionary as $keyWord=>$keyPhrases)
|
|
{
|
|
if(in_array(strtolower($jw),$keyPhrases))
|
|
{
|
|
$found = true;
|
|
$gram5Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
}
|
|
|
|
if(!$found)
|
|
{
|
|
foreach(self::$gram5Highlighters as $keyWord=>$keyPhrases)
|
|
{
|
|
if(@in_array($ph,$keyPhrases))
|
|
{
|
|
$gram5Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
if(@in_array($jw,$keyPhrases))
|
|
{
|
|
$gram5Analysis[$keyWord][] = $preci['JWORDS'];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return $gram5Analysis;
|
|
}
|
|
|
|
function __nGramAnalysis($n,$words)
|
|
{
|
|
$joinedWords = array();
|
|
$precise = array();
|
|
|
|
foreach($words as $index=>$word)
|
|
{
|
|
if(($index+$n) <= (count($words)))
|
|
{
|
|
$jWord = $word;
|
|
for($i=1;$i<$n;$i++)
|
|
{
|
|
$jWord .= ' '.$words[$index+$i];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
$joinedWords[] = $jWord;
|
|
if($this->debug) $this->precise['nGrams-'.$n]['SENTENCE'][] = $jWord;
|
|
}
|
|
|
|
$cntr = 0;
|
|
foreach($joinedWords as $jWord)
|
|
{
|
|
$word_analyzed = $this->__analyzeSentence($jWord);
|
|
|
|
if($this->debug) $this->precise['nGrams-'.$n]['ANALYSED'][] = $word_analyzed;
|
|
|
|
$precis = $this->__createPrecise($word_analyzed,false);
|
|
$precise['PRECIS'][$cntr]['JWORDS'] = $jWord;
|
|
$precise['PRECIS'][$cntr]['DET'] = $precis;
|
|
|
|
if($this->debug) $this->precise['nGrams-'.$n]['PRECIS'][$cntr]['JWORDS'] = $jWord;
|
|
if($this->debug) $this->precise['nGrams-'.$n]['PRECIS'][$cntr]['DET'] = $precis;
|
|
|
|
$cntr++;
|
|
}
|
|
|
|
return $precise;
|
|
}
|
|
|
|
function __generateMETRICs()
|
|
{
|
|
$metric[] = strtolower('Meter');
|
|
$metric[] = strtolower('Mile');
|
|
|
|
self::$dictionary['METRIC'] = $metric;
|
|
}
|
|
|
|
function __generateGENDERs()
|
|
{
|
|
$gender[] = strtolower('Boy');
|
|
$gender[] = strtolower('Boys');
|
|
$gender[] = strtolower('Girl');
|
|
$gender[] = strtolower('Girls');
|
|
$gender[] = strtolower('Man');
|
|
$gender[] = strtolower('Men');
|
|
$gender[] = strtolower('Woman');
|
|
$gender[] = strtolower('Women');
|
|
$gender[] = strtolower('Male');
|
|
$gender[] = strtolower('Female');
|
|
|
|
self::$dictionary['GENDER'] = $gender;
|
|
}
|
|
|
|
function __generateGENDERCLASSIFIERs()
|
|
{
|
|
$male[] = strtolower('Boy');
|
|
$male[] = strtolower('Boys');
|
|
$male[] = strtolower('Man');
|
|
$male[] = strtolower('Men');
|
|
$male[] = strtolower('Male');
|
|
|
|
self::$dictionary['MALE'] = $male;
|
|
self::$gram1highlighters[] = "MALE";
|
|
|
|
$female[] = strtolower('Girl');
|
|
$female[] = strtolower('Girls');
|
|
$female[] = strtolower('Woman');
|
|
$female[] = strtolower('Women');
|
|
$female[] = strtolower('Female');
|
|
|
|
self::$dictionary['FEMALE'] = $female;
|
|
self::$gram1highlighters[] = "FEMALE";
|
|
}
|
|
|
|
function __generateSPORTCLASSIFIERs()
|
|
{
|
|
$running = array();
|
|
$running[] = strtolower('Dash');
|
|
$running[] = strtolower('Marathon');
|
|
$running[] = strtolower('Race Walking');
|
|
$running[] = strtolower('Hurdles');
|
|
$running[] = strtolower('Run');
|
|
$running[] = strtolower('Steeplechase');
|
|
$running[] = strtolower('Race');
|
|
$running[] = strtolower('Race Walk');
|
|
self::$dictionary[ParserUtility::$_RUNNING_] = $running;
|
|
self::$gram1highlighters[] = ParserUtility::$_RUNNING_;
|
|
|
|
$relay = array();
|
|
$relay[] = strtolower('Relay');
|
|
$relay[] = strtolower('Sprint Medley');
|
|
$relay[] = strtolower('Distance Medley');
|
|
$relay[] = strtolower('Shuttle');
|
|
self::$dictionary[ParserUtility::$_RELAY_] = $relay;
|
|
self::$gram1highlighters[] = ParserUtility::$_RELAY_;
|
|
|
|
$throw = array();
|
|
$throw[] = strtolower('Discus');
|
|
$throw[] = strtolower('Hammer');
|
|
$throw[] = strtolower('Hammer Throw');
|
|
$throw[] = strtolower('Javelin');
|
|
$throw[] = strtolower('Shot Put');
|
|
$throw[] = strtolower('Weight Throw');
|
|
self::$dictionary[ParserUtility::$_THROWING_] = $throw;
|
|
self::$gram1highlighters[] = ParserUtility::$_THROWING_;
|
|
|
|
$jumping = array();
|
|
$jumping[] = strtolower('High Jump');
|
|
$jumping[] = strtolower('Long Jump');
|
|
$jumping[] = strtolower('Pole Vault');
|
|
$jumping[] = strtolower('Triple Jump');
|
|
self::$dictionary[ParserUtility::$_JUMPING_] = $jumping;
|
|
self::$gram1highlighters[] = ParserUtility::$_JUMPING_;
|
|
|
|
$mixed = array();
|
|
$mixed[] = strtolower('Decathlon');
|
|
$mixed[] = strtolower('Heptathlon');
|
|
$mixed[] = strtolower('Pentathlon');
|
|
self::$dictionary[ParserUtility::$_MIXED_EVENT_] = $mixed;
|
|
self::$gram1highlighters[] = ParserUtility::$_MIXED_EVENT_;
|
|
|
|
$team_jump = array();
|
|
self::$dictionary[ParserUtility::$_JUMPING_TEAM_] = $team_jump;
|
|
self::$gram1highlighters[] = ParserUtility::$_JUMPING_TEAM_;
|
|
|
|
$team_throw = array();
|
|
self::$dictionary[ParserUtility::$_THROWING_TEAM_] = $team_throw;
|
|
self::$gram1highlighters[] = ParserUtility::$_THROWING_TEAM_;
|
|
|
|
$classifiers = array();
|
|
$classifiers[] = ParserUtility::$_RUNNING_;
|
|
$classifiers[] = ParserUtility::$_JUMPING_;
|
|
$classifiers[] = ParserUtility::$_THROWING_;
|
|
$classifiers[] = ParserUtility::$_RELAY_;
|
|
$classifiers[] = ParserUtility::$_JUMPING_TEAM_;
|
|
$classifiers[] = ParserUtility::$_THROWING_TEAM_;
|
|
$classifiers[] = ParserUtility::$_MIXED_EVENT_;
|
|
self::$dictionary['SPORTS_CLASSIFIER'] = $classifiers;
|
|
|
|
////
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Final');
|
|
$sportLevel[] = strtolower('Finals');
|
|
self::$dictionary[ParserUtility::$_ROUND_FINAL_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_ROUND_FINAL_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Semi');
|
|
$sportLevel[] = strtolower('Semis');
|
|
$sportLevel[] = strtolower('Semi-Final');
|
|
$sportLevel[] = strtolower('Semi-Finals');
|
|
$sportLevel[] = strtolower('SemiFinal');
|
|
$sportLevel[] = strtolower('SemiFinals');
|
|
self::$dictionary[ParserUtility::$_ROUND_SEMI_FINAL_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_ROUND_SEMI_FINAL_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Priliminary');
|
|
$sportLevel[] = strtolower('Priliminaries');
|
|
$sportLevel[] = strtolower('Preliminaries');
|
|
$sportLevel[] = strtolower('Prilim');
|
|
$sportLevel[] = strtolower('Prilims');
|
|
self::$dictionary[ParserUtility::$_ROUND_PRILIMINARY_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_ROUND_PRILIMINARY_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Trials');
|
|
$sportLevel[] = strtolower('Trial');
|
|
self::$dictionary[ParserUtility::$_ROUND_TRIAL_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_ROUND_TRIAL_;
|
|
|
|
|
|
$classifiers = array();
|
|
$classifiers[] = ParserUtility::$_ROUND_FINAL_;
|
|
$classifiers[] = ParserUtility::$_ROUND_SEMI_FINAL_;
|
|
$classifiers[] = ParserUtility::$_ROUND_PRILIMINARY_;
|
|
$classifiers[] = ParserUtility::$_ROUND_TRIAL_;
|
|
self::$dictionary['SPORTS-ROUND'] = $classifiers;
|
|
|
|
|
|
/////
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Open');
|
|
self::$dictionary[ParserUtility::$_LEVEL_OPEN_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_OPEN_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('FROSH / SOPH');
|
|
$sportLevel[] = strtolower('FROSH/ SOPH');
|
|
$sportLevel[] = strtolower('FROSH /SOPH');
|
|
$sportLevel[] = strtolower('FROSH/SOPH');
|
|
$sportLevel[] = strtolower('Freshman/Sophomore');
|
|
self::$dictionary[ParserUtility::$_LEVEL_FROS_SOPH_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_FROS_SOPH_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Freshman');
|
|
$sportLevel[] = strtolower('Freshwoman');
|
|
$sportLevel[] = strtolower('Freshmen');
|
|
$sportLevel[] = strtolower('Freshwomen');
|
|
$sportLevel[] = strtolower('FROSH');
|
|
self::$dictionary[ParserUtility::$_LEVEL_FRESHMAN_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_FRESHMAN_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Sophomore');
|
|
$sportLevel[] = strtolower('SOPH');
|
|
self::$dictionary[ParserUtility::$_LEVEL_SOPHOMORE_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_SOPHOMORE_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Junior');
|
|
self::$dictionary[ParserUtility::$_LEVEL_JUNIOR_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_JUNIOR_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Senior');
|
|
self::$dictionary[ParserUtility::$_LEVEL_SENIOR_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_SENIOR_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Varsity');
|
|
$sportLevel[] = strtolower('Individual Varsity');
|
|
self::$dictionary[ParserUtility::$_LEVEL_UNIVARSITY_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_UNIVARSITY_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Conference');
|
|
self::$dictionary[ParserUtility::$_LEVEL_CONFERENCE_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_CONFERENCE_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('League');
|
|
self::$dictionary[ParserUtility::$_LEVEL_LEAGUE_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_LEAGUE_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Junior Varsity');
|
|
$sportLevel[] = strtolower('Junior Varsi');
|
|
$sportLevel[] = strtolower('Jr. Varsity');
|
|
$sportLevel[] = strtolower('JV');
|
|
self::$dictionary[ParserUtility::$_LEVEL_JV_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_JV_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Sub Midget');
|
|
$sportLevel[] = strtolower('Sub-Midget');
|
|
self::$dictionary[ParserUtility::$_LEVEL_SUB_MIDGET_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_SUB_MIDGET_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Midget');
|
|
self::$dictionary[ParserUtility::$_LEVEL_MIDGET_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_MIDGET_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Junior Weigh');
|
|
$sportLevel[] = strtolower('Jr. Weigh');
|
|
$sportLevel[] = strtolower('Jr Weigh');
|
|
self::$dictionary[ParserUtility::$_LEVEL_JUNIOR_WEIGH_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_JUNIOR_WEIGH_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Novice');
|
|
self::$dictionary[ParserUtility::$_LEVEL_NOVICE_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_NOVICE_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Class');
|
|
self::$dictionary[ParserUtility::$_LEVEL_CLASS_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_CLASS_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Division');
|
|
self::$dictionary[ParserUtility::$_LEVEL_DIVISION_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_DIVISION_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Crossover');
|
|
self::$dictionary[ParserUtility::$_LEVEL_CROSSOVER_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_CROSSOVER_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Indiv. Team');
|
|
$sportLevel[] = strtolower('Individual Team');
|
|
self::$dictionary[ParserUtility::$_LEVEL_INDIV_TEAM_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_INDIV_TEAM_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Junior-Senior');
|
|
$sportLevel[] = strtolower('Junior- Senior');
|
|
$sportLevel[] = strtolower('Junior -Senior');
|
|
$sportLevel[] = strtolower('Junior - Senior');
|
|
$sportLevel[] = strtolower('Jr.-Sr.');
|
|
$sportLevel[] = strtolower('Jr.- Sr.');
|
|
$sportLevel[] = strtolower('Jr. -Sr.');
|
|
$sportLevel[] = strtolower('Jr. - Sr.');
|
|
self::$dictionary[ParserUtility::$_LEVEL_JR_SR_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_JR_SR_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Unseeded');
|
|
self::$dictionary[ParserUtility::$_LEVEL_UNSEEDED_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_UNSEEDED_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Championship');
|
|
self::$dictionary[ParserUtility::$_LEVEL_CHAMPIONSHIP_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_CHAMPIONSHIP_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Elite');
|
|
$sportLevel[] = strtolower('Elit');
|
|
self::$dictionary[ParserUtility::$_LEVEL_ELITE_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_ELITE_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Small Schools');
|
|
$sportLevel[] = strtolower('Small School');
|
|
self::$dictionary[ParserUtility::$_LEVEL_SMALL_SCHOOL_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_SMALL_SCHOOL_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Large Schools');
|
|
$sportLevel[] = strtolower('Large School');
|
|
self::$dictionary[ParserUtility::$_LEVEL_LARGE_SCHOOL_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_LARGE_SCHOOL_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('Invitational');
|
|
self::$dictionary[ParserUtility::$_LEVEL_INVITATIONAL_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_INVITATIONAL_;
|
|
|
|
$sportLevel = array();
|
|
$sportLevel[] = strtolower('PSAL');
|
|
self::$dictionary[ParserUtility::$_LEVEL_PSAL_] = $sportLevel;
|
|
self::$gram1highlighters[] = ParserUtility::$_LEVEL_PSAL_;
|
|
|
|
$classifiers = array();
|
|
$classifiers[] = ParserUtility::$_LEVEL_OPEN_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_FROS_SOPH_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_FRESHMAN_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_SOPHOMORE_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_JUNIOR_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_SENIOR_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_UNIVARSITY_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_CONFERENCE_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_JV_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_SUB_MIDGET_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_MIDGET_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_JUNIOR_WEIGH_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_NOVICE_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_CLASS_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_DIVISION_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_CROSSOVER_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_INDIV_TEAM_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_JR_SR_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_UNSEEDED_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_CHAMPIONSHIP_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_ELITE_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_SMALL_SCHOOL_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_LARGE_SCHOOL_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_INVITATIONAL_;
|
|
$classifiers[] = ParserUtility::$_LEVEL_PSAL_;
|
|
self::$dictionary['SPORTS-LEVEL'] = $classifiers;
|
|
|
|
/////////
|
|
}
|
|
|
|
function __generateHEADERCOLs()
|
|
{
|
|
$gender[] = strtolower('Name');
|
|
$gender[] = strtolower('Year');
|
|
$gender[] = strtolower('Seed');
|
|
$gender[] = strtolower('School');
|
|
$gender[] = strtolower('Finals');
|
|
$gender[] = strtolower('Prelims');
|
|
$gender[] = strtolower('H#');
|
|
$gender[] = strtolower('Comp#');
|
|
$gender[] = strtolower('Points');
|
|
$gender[] = strtolower('Time');
|
|
$gender[] = strtolower('Team');
|
|
|
|
self::$dictionary['HEADER-COL'] = $gender;
|
|
}
|
|
|
|
function __generate2GRAMIDENTIFIERSs()
|
|
{
|
|
$wordPattern = array();
|
|
$wordPattern[] = strtoupper('NOUN NOUN');
|
|
$wordPattern[] = strtoupper('NOUN NUMBER');
|
|
self::$gram2Highlighters['2W-PHRASE'] = $wordPattern;
|
|
|
|
$wordPattern = array();
|
|
$wordPattern[] = strtoupper('NUMBER METRIC');
|
|
$wordPattern[] = strtoupper('NOUN METRIC');
|
|
self::$gram2Highlighters['METRIC-PHRASE'] = $wordPattern;
|
|
|
|
$specificPhrase = array();
|
|
$specificPhrase[] = strtolower('Sprint Medley');
|
|
self::$gram2Highlighters['RELAY'] = $specificPhrase;
|
|
}
|
|
|
|
function __generate3GRAMIDENTIFIERSs()
|
|
{
|
|
$wordPattern[] = strtoupper('NOUN NOUN NOUN');
|
|
$wordPattern[] = strtoupper('NOUN NOUN NUMBER');
|
|
$wordPattern[] = strtoupper('NUMBER NOUN NOUN');
|
|
self::$gram3Highlighters['3W-PHRASE'] = $wordPattern;
|
|
}
|
|
|
|
function __generate4GRAMIDENTIFIERSs()
|
|
{
|
|
$wordPattern[] = strtoupper('');
|
|
self::$gram4Highlighters['4W-PHRASE'] = $wordPattern;
|
|
|
|
$specificPhrase = array();
|
|
$specificPhrase[] = strtolower('Hurdles 2 Person Rel');
|
|
self::$gram4Highlighters[ParserUtility::$_RELAY_] = $specificPhrase;
|
|
}
|
|
|
|
function __generate5GRAMIDENTIFIERSs()
|
|
{
|
|
$wordPattern[] = strtoupper('');
|
|
self::$gram5Highlighters['4W-PHRASE'] = $wordPattern;
|
|
|
|
$specificPhrase = array();
|
|
$specificPhrase[] = strtolower('Long Jump 2 Person Rel');
|
|
$specificPhrase[] = strtolower('Triple Jump 2 Person Rel');
|
|
$specificPhrase[] = strtolower('High Jump 2 Person Rel');
|
|
$specificPhrase[] = strtolower('Pole Vault 2 Person Rel');
|
|
self::$gram5Highlighters[ParserUtility::$_JUMPING_TEAM_] = $specificPhrase;
|
|
|
|
$specificPhrase = array();
|
|
$specificPhrase[] = strtolower('Shot Put 2 Person Rel');
|
|
$specificPhrase[] = strtolower('Discus Throw 2 Person Re');
|
|
self::$gram5Highlighters[ParserUtility::$_THROWING_TEAM_] = $specificPhrase;
|
|
}
|
|
|
|
function __generateNOUNs()
|
|
{
|
|
$noun[] = strtolower('');
|
|
self::$dictionary['NOUN'] = $noun;
|
|
}
|
|
|
|
function __generatePRONOUNs()
|
|
{
|
|
$pronoun[] = strtolower('');
|
|
self::$dictionary['PRONOUN'] = $pronoun;
|
|
}
|
|
|
|
function __generateVERBs()
|
|
{
|
|
$verb[] = strtolower('');
|
|
self::$dictionary['VERB'] = $verb;
|
|
}
|
|
|
|
function __generateADJECTIVEs()
|
|
{
|
|
$adjective[] = strtolower('');
|
|
self::$dictionary['ADJECTIVE'] = $adjective;
|
|
}
|
|
|
|
function __generateADVERBs()
|
|
{
|
|
$adverb[] = strtolower('');
|
|
self::$dictionary['ADVERB'] = $adverb;
|
|
}
|
|
|
|
function __generatePREPOSITIONs()
|
|
{
|
|
$preposition[] = strtolower('of');
|
|
$preposition[] = strtolower('with');
|
|
$preposition[] = strtolower('at');
|
|
$preposition[] = strtolower('from');
|
|
$preposition[] = strtolower('into');
|
|
$preposition[] = strtolower('during');
|
|
$preposition[] = strtolower('including');
|
|
$preposition[] = strtolower('until');
|
|
$preposition[] = strtolower('against');
|
|
$preposition[] = strtolower('among');
|
|
$preposition[] = strtolower('throughout');
|
|
$preposition[] = strtolower('despite');
|
|
$preposition[] = strtolower('towards');
|
|
$preposition[] = strtolower('upon');
|
|
$preposition[] = strtolower('concerning');
|
|
$preposition[] = strtolower('to');
|
|
$preposition[] = strtolower('in');
|
|
$preposition[] = strtolower('for');
|
|
$preposition[] = strtolower('on');
|
|
$preposition[] = strtolower('by');
|
|
$preposition[] = strtolower('about');
|
|
$preposition[] = strtolower('like');
|
|
$preposition[] = strtolower('through');
|
|
$preposition[] = strtolower('over');
|
|
$preposition[] = strtolower('before');
|
|
$preposition[] = strtolower('between');
|
|
$preposition[] = strtolower('after');
|
|
$preposition[] = strtolower('since');
|
|
$preposition[] = strtolower('without');
|
|
$preposition[] = strtolower('under');
|
|
$preposition[] = strtolower('within');
|
|
$preposition[] = strtolower('along');
|
|
$preposition[] = strtolower('following');
|
|
$preposition[] = strtolower('across');
|
|
$preposition[] = strtolower('behind');
|
|
$preposition[] = strtolower('beyond');
|
|
$preposition[] = strtolower('plus');
|
|
$preposition[] = strtolower('except');
|
|
$preposition[] = strtolower('but');
|
|
$preposition[] = strtolower('up');
|
|
$preposition[] = strtolower('out');
|
|
$preposition[] = strtolower('around');
|
|
$preposition[] = strtolower('down');
|
|
$preposition[] = strtolower('off');
|
|
$preposition[] = strtolower('above');
|
|
$preposition[] = strtolower('near');
|
|
|
|
self::$dictionary['PREPOSITION'] = $preposition;
|
|
}
|
|
|
|
function __generateCONJUNCTIONs()
|
|
{
|
|
$conjunctions[] = strtolower('');
|
|
self::$dictionary['CONJUNCTION'] = $conjunctions;
|
|
}
|
|
|
|
function __generateINTERJECTIONs()
|
|
{
|
|
$interjections[] = strtolower('');
|
|
self::$dictionary['INTERJECTION'] = $interjections;
|
|
}
|
|
|
|
static function getWords($sentence, $trim=true)
|
|
{
|
|
$words = array();
|
|
$ws = explode(" ", $sentence);
|
|
$counter = 0;
|
|
|
|
foreach($ws as $wds)
|
|
{
|
|
if($trim && trim($wds)=='')
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if(false && $trim)
|
|
{
|
|
for ($i = 0; $i < strlen($wds); $i++)
|
|
{
|
|
if (strstr('.,!?;:()[]{}""\'\'',$wds[$i]))
|
|
{
|
|
if ($i == 0)
|
|
{
|
|
$wds = substr($wds, $i+1);
|
|
}
|
|
else if ($i == strlen($wds)-1)
|
|
{
|
|
$wds = substr($wds, 0, strlen($wds)-1);
|
|
}
|
|
else
|
|
{
|
|
$wds[$i] = ' ';
|
|
}
|
|
}
|
|
}
|
|
|
|
$eplded = explode(" ",$wds);
|
|
|
|
if(sizeof($eplded) > 1)
|
|
{
|
|
$wds = array_pop($eplded);
|
|
foreach($eplded as $w)
|
|
{
|
|
$words[] = trim($w);
|
|
}
|
|
}
|
|
}
|
|
|
|
$words[] = trim($wds);
|
|
//$words[$counter]['REPORT'] = self::detectWord($wds);
|
|
$counter++;
|
|
}
|
|
|
|
return $words;
|
|
}
|
|
}
|
|
?>
|