kris@sentientgeeks.com 01ed2f0b15 initial Commit
2021-02-09 10:26:46 +05:30

1301 lines
43 KiB
PHP

<?php
class Lexicon
{
private $debug = false;
private $sentence = '';
private $generalContext = '';
private $precise = array();
private $words = array();
private $word_count = '';
private $wordTally = array();
private $word_analyzed = array();
private static $dictionary = array();
private static $gram1highlighters = array();
private static $gram2Highlighters = array();
private static $gram3Highlighters = array();
private static $gram4Highlighters = array();
private static $gram5Highlighters = array();
private static $initiatedOn = 0;
function __construct($sentence)
{
$this->sentence = $sentence;
}
function __initiate()
{
$now = intval(date("YmdHis"));
if(($now - self::$initiatedOn) > 30)//(30*60*60)
{
self::$initiatedOn = $now;
$this->__generateMETRICs();
$this->__generateGENDERs();
$this->__generateGENDERCLASSIFIERs();
$this->__generateSPORTCLASSIFIERs();
$this->__generate2GRAMIDENTIFIERSs();
$this->__generate3GRAMIDENTIFIERSs();
$this->__generate4GRAMIDENTIFIERSs();
$this->__generate5GRAMIDENTIFIERSs();
$this->__generateNOUNs();
$this->__generatePRONOUNs();
$this->__generateVERBs();
$this->__generateADJECTIVEs();
$this->__generateADVERBs();
$this->__generatePREPOSITIONs();
$this->__generateCONJUNCTIONs();
$this->__generateINTERJECTIONs();
$this->__generateHEADERCOLs();
}
}
function preAnalyse()
{
$this->__initiate();
$this->words = self::getWords($this->sentence);
$this->word_count = count($this->words);
$this->word_analyzed = $this->__analyzeSentence($this->sentence);
$this->wordTally = $this->__createWordTally($this->word_analyzed);
$this->generalContext = $this->__inferGeneralContext($this->wordTally);
}
function analyse()
{
$this->preAnalyse();
$gram1Analysis = $this->__createPrecise($this->word_analyzed);
$this->precise['CONTEXT'] = $gram1Analysis;
$this->precise['CLARITY'] = array();
$gram1Analysis = $this->__1GramAnalysis($this->word_analyzed);
$this->__updateContext($gram1Analysis);
$gram2Analysis = $this->__2GramAnalysis($this->words);
$this->__updateContext($gram2Analysis);
$gram3Analysis = $this->__3GramAnalysis($this->words);
$this->__updateContext($gram3Analysis);
$gram4Analysis = $this->__4GramAnalysis($this->words);
$this->__updateContext($gram4Analysis);
$gram5Analysis = $this->__5GramAnalysis($this->words);
$this->__updateContext($gram5Analysis);
$this->precise['CONTEXT'] = array_unique($this->precise['CONTEXT']);
}
function getAnalysisForNames()
{
$this->analyse();
$nameAnalysis = array();
$nameParts = array();
if($this->words[0]==="#")
{
if(is_numeric($this->words[1]))
{
$nameAnalysis['ROLL-NO'] = $this->words[0].' '.$this->words[1];
}
for($i=2; $i<sizeof($this->words);$i++)
{
$nameParts[] = $this->words[$i];
}
}
else
{
$nameParts = $this->words;
}
$nameStartFrom = 0;
$nameEndsAt = sizeof($nameParts);
if(strpos($nameParts[0],",")!==false)
{
@$nameAnalysis['LAST-NAME'] = str_replace(",","",$nameParts[0]);
$nameStartFrom = 1;
}
else
{
@$nameAnalysis['LAST-NAME'] = $nameParts[$nameEndsAt-1];
$nameEndsAt = $nameEndsAt-1;
}
for($i=$nameStartFrom; $i<$nameEndsAt; $i++)
{
@$nameAnalysis['FIRST-NAME'] .= ' '.$nameParts[$i];
}
$nameAnalysis['FULL-NAME'] = trim(@$nameAnalysis['FIRST-NAME'].' '.@$nameAnalysis['LAST-NAME']);
return $nameAnalysis;
}
function getGeneralContext()
{
return $this->generalContext;
}
function getPrecise()
{
return $this->precise;
}
function getLeadPhrase()
{
$return = "";
if(in_array('2W-PHRASE',$this->precise['CONTEXT']))
{
$w2Phrase = $this->precise['CLARITY']['2W-PHRASE'][0];
$wordStEnd = ParserUtility::get_start_end_pos(trim($this->sentence), $w2Phrase);
if($wordStEnd[0]['START-POSITION']==0)
{
$return = $w2Phrase;
}
}
if(in_array('3W-PHRASE',$this->precise['CONTEXT']))
{
$w3Phrase = $this->precise['CLARITY']['3W-PHRASE'][0];
$wordStEnd = ParserUtility::get_start_end_pos(trim($this->sentence), $w3Phrase);
if($wordStEnd[0]['START-POSITION']==0)
{
$return = $w3Phrase;
}
}
return $return;
}
function getOtherPhrases()
{
$return = array();
if(in_array('2W-PHRASE',$this->precise['CONTEXT']))
{
$cnt = 0;
foreach($this->precise['CLARITY']['2W-PHRASE'] as $phrase)
{
if($cnt>0)
{
$return[] = $phrase;
}
$cnt++;
}
}
if(in_array('3W-PHRASE',$this->precise['CONTEXT']))
{
$return = array();
foreach($this->precise['CLARITY']['3W-PHRASE'] as $phrase)
{
if($cnt>0)
{
$return[] = $phrase;
}
$cnt++;
}
}
if(in_array('4W-PHRASE',$this->precise['CONTEXT']))
{
$return = array();
foreach($this->precise['CLARITY']['4W-PHRASE'] as $phrase)
{
if($cnt>0)
{
$return[] = $phrase;
}
$cnt++;
}
}
if(in_array('5W-PHRASE',$this->precise['CONTEXT']))
{
$return = array();
foreach($this->precise['CLARITY']['5W-PHRASE'] as $phrase)
{
if($cnt>0)
{
$return[] = $phrase;
}
$cnt++;
}
}
return $return;
}
function getMetricPhrase()
{
$return = "";
if(in_array('METRIC-PHRASE',$this->precise['CONTEXT']))
{
$return = $this->precise['CLARITY']['METRIC-PHRASE'][0];
}
return $return;
}
function getGender()
{
$return = "";
if(in_array('MALE',$this->precise['CONTEXT']))
{
$return = 'MALE';
}
elseif(in_array('FEMALE',$this->precise['CONTEXT']))
{
$return = 'FEMALE';
}
return $return;
}
function getSports()
{
$return = "";
foreach($this->precise['CONTEXT'] as $context)
{
if(in_array($context, self::$dictionary['SPORTS_CLASSIFIER']))
{
$return = $this->precise['CLARITY'][$context][0];
}
}
return $return;
}
function getSportsType()
{
$return = "";
foreach($this->precise['CONTEXT'] as $context)
{
if(in_array($context,self::$dictionary['SPORTS_CLASSIFIER']))
{
$return = $context;
}
}
return $return;
}
function getSportsRound()
{
$return = "";
if(is_array(@$this->precise['CONTEXT']))
{
foreach(@$this->precise['CONTEXT'] as $context)
{
if(in_array($context,self::$dictionary['SPORTS-ROUND']))
{
$return = $context;
}
}
}
return $return;
}
function getSportsLevel()
{
$return = "";
if(is_array(@$this->precise['CONTEXT']))
{
foreach(@$this->precise['CONTEXT'] as $context)
{
if(in_array($context,self::$dictionary['SPORTS-LEVEL']))
{
$return = $context;
}
}
}
return $return;
}
static function getDictionary()
{
return self::$dictionary;
}
function __updateContext($analysis)
{
if(sizeof($analysis) > 0)
{
foreach($analysis as $keyWord => $gram)
{
if(in_array($keyWord,self::$dictionary['SPORTS_CLASSIFIER']))
{
foreach($this->precise['CONTEXT'] as $key => $lookpSportsClassifier)
{
if(in_array($lookpSportsClassifier,self::$dictionary['SPORTS_CLASSIFIER']))
{
unset($this->precise['CONTEXT'][$key]);
unset($this->precise['CLARITY'][$lookpSportsClassifier]);
}
}
}
array_push($this->precise['CONTEXT'], $keyWord);
$this->precise['CLARITY'][$keyWord] = $gram;
}
}
}
function __analyzeSentence($sentence)
{
$word_analyzed = array();
$words = self::getWords($sentence);
foreach($words as $word)
{
$found = false;
foreach(self::$dictionary as $classification=>$dicElement)
{
if(in_array(strtolower($word),$dicElement))
{
$found = true;
$word_analyzed[$word]['CLASSIFICATION'][] = $classification;
}
}
if(!$found)
{
$classification = 'NOUN';
if(is_numeric($word))
{
$classification = 'NUMBER';
}
elseif(strpos($word, ":")!==false)
{
$expld = explode(":",$word);
foreach($expld as $exp)
{
$classification = 'NUMBER';
if(is_numeric($exp)===false)
{
$classification = 'NOUN';
break;
}
}
}
elseif(strpos($word, "-")!==false)
{
$expld = explode("-",$word);
foreach($expld as $exp)
{
$classification = 'NUMBER';
if(is_numeric($exp)===false)
{
$classification = 'NOUN';
break;
}
}
}
$word_analyzed[$word]['CLASSIFICATION'][] = $classification;
}
}
return $word_analyzed;
}
function __createPrecise($word_analyzed,$analyze=true)
{
$precise = array();
$thisPrecise = array();
foreach($word_analyzed as $analysis)
{
foreach($analysis['CLASSIFICATION'] as $analogy)
{
$precise[] = $analogy;
$thisPrecise[] = $analogy;
}
}
if($analyze)
{
$thisPrecise = array();
foreach($precise as $preci)
{
if(in_array($preci, self::$gram1highlighters))
{
$thisPrecise[] = $preci;
}
}
}
return $thisPrecise;
}
function __createWordTally($word_analyzed)
{
$wordCount = count($word_analyzed);
$tally = array();
$tally["WORDCOUNT"] = $wordCount;
$cntr = 0;
foreach($word_analyzed as $wrd=>$analysis)
{
$tally["WORD"][$wrd]['COUNT'] = (@$tally["WORD"][$wrd]['COUNT'])+1;
$tally["WORD"][$wrd]['POSITION'][] = $cntr;
$tally["WORD"][$wrd]['PERCENT'] = round(($tally["WORD"][$wrd]['COUNT'] / $wordCount )*100,3);
$analogy = $analysis['CLASSIFICATION'][0];
$tally["WORDTYPE"][$analogy]['COUNT'] = (@$tally["WORDTYPE"][$analogy]['COUNT'])+1;
$tally["WORDTYPE"][$analogy]['POSITION'][] = $cntr;
$tally["WORDTYPE"][$analogy]['PERCENT'] = round(($tally["WORDTYPE"][$analogy]['COUNT'] / $wordCount )*100,3);
$cntr++;
}
return $tally;
}
function __inferGeneralContext($wordTally)
{
$wordCount = $wordTally['WORDCOUNT'];
if($wordCount==0)
{
return "LINE-BREAK";
}
if($wordCount==1)
{
foreach($wordTally["WORD"] as $_wrd=>$restOfIt)
{
$tally = array();
$len = strlen("".$_wrd."");
for ($i = 0; $i < $len; $i++)
{
$tally[$_wrd[$i]]['COUNT'] = (@$tally[$_wrd[$i]]['COUNT']) + 1;
$tally[$_wrd[$i]]['PERCENT'] = ($tally[$_wrd[$i]]['COUNT']/$len) * 100;
}
if(count($tally)==1 && $len > 5)
{
return "DECORATOR";
}
}
}
$nounPercent = @$wordTally['WORDTYPE']['NOUN']['PERCENT'];
$headerPercent = @$wordTally['WORDTYPE']['HEADER-COL']['PERCENT'];
$sportsLvlPercent = 0;
foreach(self::$dictionary['SPORTS-ROUND'] as $slevel)
{
$sLvlPc = @$wordTally['WORDTYPE'][$slevel]['PERCENT'];
if($sportsLvlPercent < $sLvlPc)
{
$sportsLvlPercent = $sLvlPc;
}
}
if($wordCount == 1 && $sportsLvlPercent == 100)
{
return "SPORTS-ROUND";
}
elseif($wordCount > 1 && $headerPercent >= 50)
{
return "HEADER";
}
elseif($nounPercent >= 50)
{
return "SENTENCE";
}
else
{
return "INFO";
}
}
function __1GramAnalysis($word_analyzed)
{
$gram1Analysis = array();
foreach($word_analyzed as $word=>$class)
{
if(in_array('GENDER',$class['CLASSIFICATION']))
{
$gram1Analysis[$class['CLASSIFICATION'][1]][] = $word;
}
if(in_array($class['CLASSIFICATION'][0],self::$dictionary['SPORTS_CLASSIFIER']))
{
$gram1Analysis[$class['CLASSIFICATION'][0]][] = $word;
}
}
return $gram1Analysis;
}
function __2GramAnalysis($words)
{
$gram2Analysis = array();
$precise = $this->__nGramAnalysis(2,$this->words);
if(is_array(@$precise['PRECIS']))
{
foreach($precise['PRECIS'] as $preci)
{
$ph = implode(" ",$preci['DET']);
$jw = strtolower($preci['JWORDS']);
$found = false;
foreach(self::$dictionary as $keyWord=>$keyPhrases)
{
if(in_array(strtolower($jw),$keyPhrases))
{
$found = true;
$gram2Analysis[$keyWord][] = $preci['JWORDS'];
}
}
if(!$found)
{
foreach(self::$gram2Highlighters as $keyWord=>$keyPhrases)
{
if(@in_array($ph,$keyPhrases))
{
$gram2Analysis[$keyWord][] = $preci['JWORDS'];
}
if(@in_array($jw,$keyPhrases))
{
$gram2Analysis[$keyWord][] = $preci['JWORDS'];
}
}
}
}
}
return $gram2Analysis;
}
function __3GramAnalysis($words)
{
$gram3Analysis = array();
$precise = $this->__nGramAnalysis(3,$this->words);
if(is_array(@$precise['PRECIS']))
{
foreach($precise['PRECIS'] as $preci)
{
$ph = implode(" ",$preci['DET']);
$jw = strtolower($preci['JWORDS']);
$found = false;
foreach(self::$dictionary as $keyWord=>$keyPhrases)
{
if(in_array(strtolower($jw),$keyPhrases))
{
$found = true;
$gram3Analysis[$keyWord][] = $preci['JWORDS'];
}
}
if(!$found)
{
foreach(self::$gram3Highlighters as $keyWord=>$keyPhrases)
{
if(@in_array($ph,$keyPhrases))
{
$gram3Analysis[$keyWord][] = $preci['JWORDS'];
}
if(@in_array($jw,$keyPhrases))
{
$gram3Analysis[$keyWord][] = $preci['JWORDS'];
}
}
}
}
}
return $gram3Analysis;
}
function __4GramAnalysis($words)
{
$gram4Analysis = array();
$precise = $this->__nGramAnalysis(4,$this->words);
if(is_array(@$precise['PRECIS']))
{
foreach($precise['PRECIS'] as $preci)
{
$ph = implode(" ",$preci['DET']);
$jw = strtolower($preci['JWORDS']);
$found = false;
foreach(self::$dictionary as $keyWord=>$keyPhrases)
{
if(in_array(strtolower($jw),$keyPhrases))
{
$found = true;
$gram4Analysis[$keyWord][] = $preci['JWORDS'];
}
}
if(!$found)
{
foreach(self::$gram4Highlighters as $keyWord=>$keyPhrases)
{
if(@in_array($ph,$keyPhrases))
{
$gram4Analysis[$keyWord][] = $preci['JWORDS'];
}
if(@in_array($jw,$keyPhrases))
{
$gram4Analysis[$keyWord][] = $preci['JWORDS'];
}
}
}
}
}
return $gram4Analysis;
}
function __5GramAnalysis($words)
{
$gram5Analysis = array();
$precise = $this->__nGramAnalysis(5,$this->words);
if(is_array(@$precise['PRECIS']))
{
foreach($precise['PRECIS'] as $preci)
{
$ph = implode(" ",$preci['DET']);
$jw = strtolower($preci['JWORDS']);
$found = false;
foreach(self::$dictionary as $keyWord=>$keyPhrases)
{
if(in_array(strtolower($jw),$keyPhrases))
{
$found = true;
$gram5Analysis[$keyWord][] = $preci['JWORDS'];
}
}
if(!$found)
{
foreach(self::$gram5Highlighters as $keyWord=>$keyPhrases)
{
if(@in_array($ph,$keyPhrases))
{
$gram5Analysis[$keyWord][] = $preci['JWORDS'];
}
if(@in_array($jw,$keyPhrases))
{
$gram5Analysis[$keyWord][] = $preci['JWORDS'];
}
}
}
}
}
return $gram5Analysis;
}
function __nGramAnalysis($n,$words)
{
$joinedWords = array();
$precise = array();
foreach($words as $index=>$word)
{
if(($index+$n) <= (count($words)))
{
$jWord = $word;
for($i=1;$i<$n;$i++)
{
$jWord .= ' '.$words[$index+$i];
}
}
else
{
break;
}
$joinedWords[] = $jWord;
if($this->debug) $this->precise['nGrams-'.$n]['SENTENCE'][] = $jWord;
}
$cntr = 0;
foreach($joinedWords as $jWord)
{
$word_analyzed = $this->__analyzeSentence($jWord);
if($this->debug) $this->precise['nGrams-'.$n]['ANALYSED'][] = $word_analyzed;
$precis = $this->__createPrecise($word_analyzed,false);
$precise['PRECIS'][$cntr]['JWORDS'] = $jWord;
$precise['PRECIS'][$cntr]['DET'] = $precis;
if($this->debug) $this->precise['nGrams-'.$n]['PRECIS'][$cntr]['JWORDS'] = $jWord;
if($this->debug) $this->precise['nGrams-'.$n]['PRECIS'][$cntr]['DET'] = $precis;
$cntr++;
}
return $precise;
}
function __generateMETRICs()
{
$metric[] = strtolower('Meter');
$metric[] = strtolower('Mile');
self::$dictionary['METRIC'] = $metric;
}
function __generateGENDERs()
{
$gender[] = strtolower('Boy');
$gender[] = strtolower('Boys');
$gender[] = strtolower('Girl');
$gender[] = strtolower('Girls');
$gender[] = strtolower('Man');
$gender[] = strtolower('Men');
$gender[] = strtolower('Woman');
$gender[] = strtolower('Women');
$gender[] = strtolower('Male');
$gender[] = strtolower('Female');
self::$dictionary['GENDER'] = $gender;
}
function __generateGENDERCLASSIFIERs()
{
$male[] = strtolower('Boy');
$male[] = strtolower('Boys');
$male[] = strtolower('Man');
$male[] = strtolower('Men');
$male[] = strtolower('Male');
self::$dictionary['MALE'] = $male;
self::$gram1highlighters[] = "MALE";
$female[] = strtolower('Girl');
$female[] = strtolower('Girls');
$female[] = strtolower('Woman');
$female[] = strtolower('Women');
$female[] = strtolower('Female');
self::$dictionary['FEMALE'] = $female;
self::$gram1highlighters[] = "FEMALE";
}
function __generateSPORTCLASSIFIERs()
{
$running = array();
$running[] = strtolower('Dash');
$running[] = strtolower('Marathon');
$running[] = strtolower('Race Walking');
$running[] = strtolower('Hurdles');
$running[] = strtolower('Run');
$running[] = strtolower('Steeplechase');
$running[] = strtolower('Race');
$running[] = strtolower('Race Walk');
self::$dictionary[ParserUtility::$_RUNNING_] = $running;
self::$gram1highlighters[] = ParserUtility::$_RUNNING_;
$relay = array();
$relay[] = strtolower('Relay');
$relay[] = strtolower('Sprint Medley');
$relay[] = strtolower('Distance Medley');
$relay[] = strtolower('Shuttle');
self::$dictionary[ParserUtility::$_RELAY_] = $relay;
self::$gram1highlighters[] = ParserUtility::$_RELAY_;
$throw = array();
$throw[] = strtolower('Discus');
$throw[] = strtolower('Hammer');
$throw[] = strtolower('Hammer Throw');
$throw[] = strtolower('Javelin');
$throw[] = strtolower('Shot Put');
$throw[] = strtolower('Weight Throw');
self::$dictionary[ParserUtility::$_THROWING_] = $throw;
self::$gram1highlighters[] = ParserUtility::$_THROWING_;
$jumping = array();
$jumping[] = strtolower('High Jump');
$jumping[] = strtolower('Long Jump');
$jumping[] = strtolower('Pole Vault');
$jumping[] = strtolower('Triple Jump');
self::$dictionary[ParserUtility::$_JUMPING_] = $jumping;
self::$gram1highlighters[] = ParserUtility::$_JUMPING_;
$mixed = array();
$mixed[] = strtolower('Decathlon');
$mixed[] = strtolower('Heptathlon');
$mixed[] = strtolower('Pentathlon');
self::$dictionary[ParserUtility::$_MIXED_EVENT_] = $mixed;
self::$gram1highlighters[] = ParserUtility::$_MIXED_EVENT_;
$team_jump = array();
self::$dictionary[ParserUtility::$_JUMPING_TEAM_] = $team_jump;
self::$gram1highlighters[] = ParserUtility::$_JUMPING_TEAM_;
$team_throw = array();
self::$dictionary[ParserUtility::$_THROWING_TEAM_] = $team_throw;
self::$gram1highlighters[] = ParserUtility::$_THROWING_TEAM_;
$classifiers = array();
$classifiers[] = ParserUtility::$_RUNNING_;
$classifiers[] = ParserUtility::$_JUMPING_;
$classifiers[] = ParserUtility::$_THROWING_;
$classifiers[] = ParserUtility::$_RELAY_;
$classifiers[] = ParserUtility::$_JUMPING_TEAM_;
$classifiers[] = ParserUtility::$_THROWING_TEAM_;
$classifiers[] = ParserUtility::$_MIXED_EVENT_;
self::$dictionary['SPORTS_CLASSIFIER'] = $classifiers;
////
$sportLevel = array();
$sportLevel[] = strtolower('Final');
$sportLevel[] = strtolower('Finals');
self::$dictionary[ParserUtility::$_ROUND_FINAL_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_ROUND_FINAL_;
$sportLevel = array();
$sportLevel[] = strtolower('Semi');
$sportLevel[] = strtolower('Semis');
$sportLevel[] = strtolower('Semi-Final');
$sportLevel[] = strtolower('Semi-Finals');
$sportLevel[] = strtolower('SemiFinal');
$sportLevel[] = strtolower('SemiFinals');
self::$dictionary[ParserUtility::$_ROUND_SEMI_FINAL_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_ROUND_SEMI_FINAL_;
$sportLevel = array();
$sportLevel[] = strtolower('Priliminary');
$sportLevel[] = strtolower('Priliminaries');
$sportLevel[] = strtolower('Preliminaries');
$sportLevel[] = strtolower('Prilim');
$sportLevel[] = strtolower('Prilims');
self::$dictionary[ParserUtility::$_ROUND_PRILIMINARY_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_ROUND_PRILIMINARY_;
$sportLevel = array();
$sportLevel[] = strtolower('Trials');
$sportLevel[] = strtolower('Trial');
self::$dictionary[ParserUtility::$_ROUND_TRIAL_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_ROUND_TRIAL_;
$classifiers = array();
$classifiers[] = ParserUtility::$_ROUND_FINAL_;
$classifiers[] = ParserUtility::$_ROUND_SEMI_FINAL_;
$classifiers[] = ParserUtility::$_ROUND_PRILIMINARY_;
$classifiers[] = ParserUtility::$_ROUND_TRIAL_;
self::$dictionary['SPORTS-ROUND'] = $classifiers;
/////
$sportLevel = array();
$sportLevel[] = strtolower('Open');
self::$dictionary[ParserUtility::$_LEVEL_OPEN_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_OPEN_;
$sportLevel = array();
$sportLevel[] = strtolower('FROSH / SOPH');
$sportLevel[] = strtolower('FROSH/ SOPH');
$sportLevel[] = strtolower('FROSH /SOPH');
$sportLevel[] = strtolower('FROSH/SOPH');
$sportLevel[] = strtolower('Freshman/Sophomore');
self::$dictionary[ParserUtility::$_LEVEL_FROS_SOPH_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_FROS_SOPH_;
$sportLevel = array();
$sportLevel[] = strtolower('Freshman');
$sportLevel[] = strtolower('Freshwoman');
$sportLevel[] = strtolower('Freshmen');
$sportLevel[] = strtolower('Freshwomen');
$sportLevel[] = strtolower('FROSH');
self::$dictionary[ParserUtility::$_LEVEL_FRESHMAN_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_FRESHMAN_;
$sportLevel = array();
$sportLevel[] = strtolower('Sophomore');
$sportLevel[] = strtolower('SOPH');
self::$dictionary[ParserUtility::$_LEVEL_SOPHOMORE_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_SOPHOMORE_;
$sportLevel = array();
$sportLevel[] = strtolower('Junior');
self::$dictionary[ParserUtility::$_LEVEL_JUNIOR_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_JUNIOR_;
$sportLevel = array();
$sportLevel[] = strtolower('Senior');
self::$dictionary[ParserUtility::$_LEVEL_SENIOR_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_SENIOR_;
$sportLevel = array();
$sportLevel[] = strtolower('Varsity');
$sportLevel[] = strtolower('Individual Varsity');
self::$dictionary[ParserUtility::$_LEVEL_UNIVARSITY_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_UNIVARSITY_;
$sportLevel = array();
$sportLevel[] = strtolower('Conference');
self::$dictionary[ParserUtility::$_LEVEL_CONFERENCE_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_CONFERENCE_;
$sportLevel = array();
$sportLevel[] = strtolower('League');
self::$dictionary[ParserUtility::$_LEVEL_LEAGUE_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_LEAGUE_;
$sportLevel = array();
$sportLevel[] = strtolower('Junior Varsity');
$sportLevel[] = strtolower('Junior Varsi');
$sportLevel[] = strtolower('Jr. Varsity');
$sportLevel[] = strtolower('JV');
self::$dictionary[ParserUtility::$_LEVEL_JV_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_JV_;
$sportLevel = array();
$sportLevel[] = strtolower('Sub Midget');
$sportLevel[] = strtolower('Sub-Midget');
self::$dictionary[ParserUtility::$_LEVEL_SUB_MIDGET_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_SUB_MIDGET_;
$sportLevel = array();
$sportLevel[] = strtolower('Midget');
self::$dictionary[ParserUtility::$_LEVEL_MIDGET_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_MIDGET_;
$sportLevel = array();
$sportLevel[] = strtolower('Junior Weigh');
$sportLevel[] = strtolower('Jr. Weigh');
$sportLevel[] = strtolower('Jr Weigh');
self::$dictionary[ParserUtility::$_LEVEL_JUNIOR_WEIGH_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_JUNIOR_WEIGH_;
$sportLevel = array();
$sportLevel[] = strtolower('Novice');
self::$dictionary[ParserUtility::$_LEVEL_NOVICE_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_NOVICE_;
$sportLevel = array();
$sportLevel[] = strtolower('Class');
self::$dictionary[ParserUtility::$_LEVEL_CLASS_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_CLASS_;
$sportLevel = array();
$sportLevel[] = strtolower('Division');
self::$dictionary[ParserUtility::$_LEVEL_DIVISION_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_DIVISION_;
$sportLevel = array();
$sportLevel[] = strtolower('Crossover');
self::$dictionary[ParserUtility::$_LEVEL_CROSSOVER_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_CROSSOVER_;
$sportLevel = array();
$sportLevel[] = strtolower('Indiv. Team');
$sportLevel[] = strtolower('Individual Team');
self::$dictionary[ParserUtility::$_LEVEL_INDIV_TEAM_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_INDIV_TEAM_;
$sportLevel = array();
$sportLevel[] = strtolower('Junior-Senior');
$sportLevel[] = strtolower('Junior- Senior');
$sportLevel[] = strtolower('Junior -Senior');
$sportLevel[] = strtolower('Junior - Senior');
$sportLevel[] = strtolower('Jr.-Sr.');
$sportLevel[] = strtolower('Jr.- Sr.');
$sportLevel[] = strtolower('Jr. -Sr.');
$sportLevel[] = strtolower('Jr. - Sr.');
self::$dictionary[ParserUtility::$_LEVEL_JR_SR_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_JR_SR_;
$sportLevel = array();
$sportLevel[] = strtolower('Unseeded');
self::$dictionary[ParserUtility::$_LEVEL_UNSEEDED_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_UNSEEDED_;
$sportLevel = array();
$sportLevel[] = strtolower('Championship');
self::$dictionary[ParserUtility::$_LEVEL_CHAMPIONSHIP_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_CHAMPIONSHIP_;
$sportLevel = array();
$sportLevel[] = strtolower('Elite');
$sportLevel[] = strtolower('Elit');
self::$dictionary[ParserUtility::$_LEVEL_ELITE_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_ELITE_;
$sportLevel = array();
$sportLevel[] = strtolower('Small Schools');
$sportLevel[] = strtolower('Small School');
self::$dictionary[ParserUtility::$_LEVEL_SMALL_SCHOOL_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_SMALL_SCHOOL_;
$sportLevel = array();
$sportLevel[] = strtolower('Large Schools');
$sportLevel[] = strtolower('Large School');
self::$dictionary[ParserUtility::$_LEVEL_LARGE_SCHOOL_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_LARGE_SCHOOL_;
$sportLevel = array();
$sportLevel[] = strtolower('Invitational');
self::$dictionary[ParserUtility::$_LEVEL_INVITATIONAL_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_INVITATIONAL_;
$sportLevel = array();
$sportLevel[] = strtolower('PSAL');
self::$dictionary[ParserUtility::$_LEVEL_PSAL_] = $sportLevel;
self::$gram1highlighters[] = ParserUtility::$_LEVEL_PSAL_;
$classifiers = array();
$classifiers[] = ParserUtility::$_LEVEL_OPEN_;
$classifiers[] = ParserUtility::$_LEVEL_FROS_SOPH_;
$classifiers[] = ParserUtility::$_LEVEL_FRESHMAN_;
$classifiers[] = ParserUtility::$_LEVEL_SOPHOMORE_;
$classifiers[] = ParserUtility::$_LEVEL_JUNIOR_;
$classifiers[] = ParserUtility::$_LEVEL_SENIOR_;
$classifiers[] = ParserUtility::$_LEVEL_UNIVARSITY_;
$classifiers[] = ParserUtility::$_LEVEL_CONFERENCE_;
$classifiers[] = ParserUtility::$_LEVEL_JV_;
$classifiers[] = ParserUtility::$_LEVEL_SUB_MIDGET_;
$classifiers[] = ParserUtility::$_LEVEL_MIDGET_;
$classifiers[] = ParserUtility::$_LEVEL_JUNIOR_WEIGH_;
$classifiers[] = ParserUtility::$_LEVEL_NOVICE_;
$classifiers[] = ParserUtility::$_LEVEL_CLASS_;
$classifiers[] = ParserUtility::$_LEVEL_DIVISION_;
$classifiers[] = ParserUtility::$_LEVEL_CROSSOVER_;
$classifiers[] = ParserUtility::$_LEVEL_INDIV_TEAM_;
$classifiers[] = ParserUtility::$_LEVEL_JR_SR_;
$classifiers[] = ParserUtility::$_LEVEL_UNSEEDED_;
$classifiers[] = ParserUtility::$_LEVEL_CHAMPIONSHIP_;
$classifiers[] = ParserUtility::$_LEVEL_ELITE_;
$classifiers[] = ParserUtility::$_LEVEL_SMALL_SCHOOL_;
$classifiers[] = ParserUtility::$_LEVEL_LARGE_SCHOOL_;
$classifiers[] = ParserUtility::$_LEVEL_INVITATIONAL_;
$classifiers[] = ParserUtility::$_LEVEL_PSAL_;
self::$dictionary['SPORTS-LEVEL'] = $classifiers;
/////////
}
function __generateHEADERCOLs()
{
$gender[] = strtolower('Name');
$gender[] = strtolower('Year');
$gender[] = strtolower('Seed');
$gender[] = strtolower('School');
$gender[] = strtolower('Finals');
$gender[] = strtolower('Prelims');
$gender[] = strtolower('H#');
$gender[] = strtolower('Comp#');
$gender[] = strtolower('Points');
$gender[] = strtolower('Time');
$gender[] = strtolower('Team');
self::$dictionary['HEADER-COL'] = $gender;
}
function __generate2GRAMIDENTIFIERSs()
{
$wordPattern = array();
$wordPattern[] = strtoupper('NOUN NOUN');
$wordPattern[] = strtoupper('NOUN NUMBER');
self::$gram2Highlighters['2W-PHRASE'] = $wordPattern;
$wordPattern = array();
$wordPattern[] = strtoupper('NUMBER METRIC');
$wordPattern[] = strtoupper('NOUN METRIC');
self::$gram2Highlighters['METRIC-PHRASE'] = $wordPattern;
$specificPhrase = array();
$specificPhrase[] = strtolower('Sprint Medley');
self::$gram2Highlighters['RELAY'] = $specificPhrase;
}
function __generate3GRAMIDENTIFIERSs()
{
$wordPattern[] = strtoupper('NOUN NOUN NOUN');
$wordPattern[] = strtoupper('NOUN NOUN NUMBER');
$wordPattern[] = strtoupper('NUMBER NOUN NOUN');
self::$gram3Highlighters['3W-PHRASE'] = $wordPattern;
}
function __generate4GRAMIDENTIFIERSs()
{
$wordPattern[] = strtoupper('');
self::$gram4Highlighters['4W-PHRASE'] = $wordPattern;
$specificPhrase = array();
$specificPhrase[] = strtolower('Hurdles 2 Person Rel');
self::$gram4Highlighters[ParserUtility::$_RELAY_] = $specificPhrase;
}
function __generate5GRAMIDENTIFIERSs()
{
$wordPattern[] = strtoupper('');
self::$gram5Highlighters['4W-PHRASE'] = $wordPattern;
$specificPhrase = array();
$specificPhrase[] = strtolower('Long Jump 2 Person Rel');
$specificPhrase[] = strtolower('Triple Jump 2 Person Rel');
$specificPhrase[] = strtolower('High Jump 2 Person Rel');
$specificPhrase[] = strtolower('Pole Vault 2 Person Rel');
self::$gram5Highlighters[ParserUtility::$_JUMPING_TEAM_] = $specificPhrase;
$specificPhrase = array();
$specificPhrase[] = strtolower('Shot Put 2 Person Rel');
$specificPhrase[] = strtolower('Discus Throw 2 Person Re');
self::$gram5Highlighters[ParserUtility::$_THROWING_TEAM_] = $specificPhrase;
}
function __generateNOUNs()
{
$noun[] = strtolower('');
self::$dictionary['NOUN'] = $noun;
}
function __generatePRONOUNs()
{
$pronoun[] = strtolower('');
self::$dictionary['PRONOUN'] = $pronoun;
}
function __generateVERBs()
{
$verb[] = strtolower('');
self::$dictionary['VERB'] = $verb;
}
function __generateADJECTIVEs()
{
$adjective[] = strtolower('');
self::$dictionary['ADJECTIVE'] = $adjective;
}
function __generateADVERBs()
{
$adverb[] = strtolower('');
self::$dictionary['ADVERB'] = $adverb;
}
function __generatePREPOSITIONs()
{
$preposition[] = strtolower('of');
$preposition[] = strtolower('with');
$preposition[] = strtolower('at');
$preposition[] = strtolower('from');
$preposition[] = strtolower('into');
$preposition[] = strtolower('during');
$preposition[] = strtolower('including');
$preposition[] = strtolower('until');
$preposition[] = strtolower('against');
$preposition[] = strtolower('among');
$preposition[] = strtolower('throughout');
$preposition[] = strtolower('despite');
$preposition[] = strtolower('towards');
$preposition[] = strtolower('upon');
$preposition[] = strtolower('concerning');
$preposition[] = strtolower('to');
$preposition[] = strtolower('in');
$preposition[] = strtolower('for');
$preposition[] = strtolower('on');
$preposition[] = strtolower('by');
$preposition[] = strtolower('about');
$preposition[] = strtolower('like');
$preposition[] = strtolower('through');
$preposition[] = strtolower('over');
$preposition[] = strtolower('before');
$preposition[] = strtolower('between');
$preposition[] = strtolower('after');
$preposition[] = strtolower('since');
$preposition[] = strtolower('without');
$preposition[] = strtolower('under');
$preposition[] = strtolower('within');
$preposition[] = strtolower('along');
$preposition[] = strtolower('following');
$preposition[] = strtolower('across');
$preposition[] = strtolower('behind');
$preposition[] = strtolower('beyond');
$preposition[] = strtolower('plus');
$preposition[] = strtolower('except');
$preposition[] = strtolower('but');
$preposition[] = strtolower('up');
$preposition[] = strtolower('out');
$preposition[] = strtolower('around');
$preposition[] = strtolower('down');
$preposition[] = strtolower('off');
$preposition[] = strtolower('above');
$preposition[] = strtolower('near');
self::$dictionary['PREPOSITION'] = $preposition;
}
function __generateCONJUNCTIONs()
{
$conjunctions[] = strtolower('');
self::$dictionary['CONJUNCTION'] = $conjunctions;
}
function __generateINTERJECTIONs()
{
$interjections[] = strtolower('');
self::$dictionary['INTERJECTION'] = $interjections;
}
static function getWords($sentence, $trim=true)
{
$words = array();
$ws = explode(" ", $sentence);
$counter = 0;
foreach($ws as $wds)
{
if($trim && trim($wds)=='')
{
continue;
}
if(false && $trim)
{
for ($i = 0; $i < strlen($wds); $i++)
{
if (strstr('.,!?;:()[]{}""\'\'',$wds[$i]))
{
if ($i == 0)
{
$wds = substr($wds, $i+1);
}
else if ($i == strlen($wds)-1)
{
$wds = substr($wds, 0, strlen($wds)-1);
}
else
{
$wds[$i] = ' ';
}
}
}
$eplded = explode(" ",$wds);
if(sizeof($eplded) > 1)
{
$wds = array_pop($eplded);
foreach($eplded as $w)
{
$words[] = trim($w);
}
}
}
$words[] = trim($wds);
//$words[$counter]['REPORT'] = self::detectWord($wds);
$counter++;
}
return $words;
}
}
?>