kris@sentientgeeks.com 01ed2f0b15 initial Commit
2021-02-09 10:26:46 +05:30

610 lines
26 KiB
PHP

<?php
class DataParser
{
private $debug = false;
private $meet_name = "";
private $meet_id = "";
private $gender = "";
private $schoolId = "";
private $seasonId = "";
private $seasonYear = "";
private $rawDataLines = array();
private $processedDataLines = array();
function __construct($meet_name, $meet_id, $gender, $schoolId, $seasonId, $seasonYear)
{
$this->meet_name = $meet_name;
$this->meet_id = $meet_id;
$this->gender = $gender;
$this->schoolId = $schoolId;
$this->seasonId = $seasonId;
$this->seasonYear = $seasonYear;
}
function setRawInputData($data)
{
if(is_string($data))
{
$data = str_replace("&amp;", "&", $data);
$dataArray = explode("\n", trim($data));
$lineCount = 0;
$intermediateArray = array();
foreach($dataArray as $lnNo=>$ln)
{
if($lineCount == 0 && trim($ln) == '')
{
continue;
}
$this->rawDataLines[] = $ln;
$lineCount++;
}
}
}
function getData()
{
return $this->processedDataLines;
}
function getFormatedData()
{
$nameCountArray = array();
$return = array();
$parsedData = $this->processedDataLines;
if(is_array($parsedData) && sizeof($parsedData)>0)
{
$reportCnt = 0;
foreach($parsedData as $data)
{
$lineType = $data->getType();
$dispData = $data->getComposedData();
if($lineType=="license-header")
{
$return['LICENCED-TO'] = @$dispData['LICENCE TO'];
$return['SUB-LICENSE'] = @$dispData['SUB-LICENSE'];
$return['LICENSE-DATE'] = @$dispData['LICENSE DATE'];
}
else if($lineType=="school-report-head")
{
$return['EVENT-NAME'] = @$dispData['EVENT NAME'];
$return['EVENT-DATE'] = @$dispData['EVENT DATE'];
}
else if($lineType=="school-name-head")
{
$return['SCHOOL-NAME'] = @$dispData['SCHOOL NAME'];
}
else if($lineType=="report-data")
{
$reportMatrix = $data->getReportMatrix();
$return['REPORT'][$reportCnt]['HEADER']['FULL-TEXT'] = @$dispData['HEADER'];
$headerAnalysis = @$dispData['HEADER-ANALYSIS'];
$eventName = trim(str_replace($headerAnalysis->getLeadPhrase(),"",@$dispData['HEADER']));
$eventName = $this->__cleanUpEventname($eventName);
$return['REPORT'][$reportCnt]['HEADER']['EVENT-NAME'] = trim($eventName);
$return['REPORT'][$reportCnt]['HEADER']['EVENT-LEVEL'] = $headerAnalysis->getLeadPhrase();
if(trim($return['REPORT'][$reportCnt]['HEADER']['EVENT-NAME'])=='')
{
$return['REPORT'][$reportCnt]['HEADER']['EVENT-NAME'] = $headerAnalysis->getLeadPhrase();
$return['REPORT'][$reportCnt]['HEADER']['EVENT-LEVEL'] = '';
}
$return['REPORT'][$reportCnt]['HEADER']['GENDER'] = $headerAnalysis->getGender();
$return['REPORT'][$reportCnt]['HEADER']['SPORTS'] = $headerAnalysis->getSports();
$return['REPORT'][$reportCnt]['HEADER']['SPORT-TYPE'] = $headerAnalysis->getSportsType();
$return['REPORT'][$reportCnt]['HEADER']['SPORT-LEVEL'] = $headerAnalysis->getSportsLevel();
$return['REPORT'][$reportCnt]['HEADER']['SPORT-LEVEL-SQ'] = $headerAnalysis->getSportsLevel();
$return['REPORT'][$reportCnt]['HEADER']['METRIC'] = $headerAnalysis->getMetricPhrase();
$return['REPORT'][$reportCnt]['HEADER']['ADDLN-NOTE'] = $headerAnalysis->getOtherPhrases();
if($reportMatrix->isUsable())
{
$return['REPORT'][$reportCnt]['HAS-RECORDS'] = true;
$return['REPORT'][$reportCnt]['COLUMNS'][]="SL";
foreach($reportMatrix->getHeaders() as $head)
{
$return['REPORT'][$reportCnt]['COLUMNS'][]=$head;
}
$return['REPORT'][$reportCnt]['COLUMNS'][]="ROUND";
$return['REPORT'][$reportCnt]['COLUMNS'][]="REMARKS";
$recCnt=0;
foreach($reportMatrix->getMatrix() as $matrix)
{
$theFinalVal='';
foreach($return['REPORT'][$reportCnt]['COLUMNS'] as $col)
{
if($col!=='REMARKS')
{
$return['REPORT'][$reportCnt]['DATA'][$recCnt][$col]['RAW']=@$matrix[$col]['COMBINED'];
$return['REPORT'][$reportCnt]['DATA'][$recCnt][$col]['FINAL']=@$matrix[$col]['DATA'];
if(isset($matrix[$col]['FIRST-NAME']))
{
$return['REPORT'][$reportCnt]['DATA'][$recCnt][$col]['FIRST-NAME']=@$matrix[$col]['FIRST-NAME'];
$return['REPORT'][$reportCnt]['DATA'][$recCnt][$col]['LAST-NAME']=@$matrix[$col]['LAST-NAME'];
$return['REPORT'][$reportCnt]['DATA'][$recCnt][$col]['ROLL-NO']=@$matrix[$col]['ROLL-NO'];
}
$theFinalVal.=@$matrix[$col]['DATA'];
}
if($col==='SL')
{
if(trim(@$matrix[$col]['DATA'])!=='' && is_numeric(trim(@$matrix[$col]['DATA']))===false)
{
$return['REPORT'][$reportCnt]['DATA'][$recCnt][$col]['RAW']='';
$return['REPORT'][$reportCnt]['DATA'][$recCnt][$col]['FINAL']='';
}
}
if( ($return['REPORT'][$reportCnt]['HEADER']['SPORT-TYPE'] == ParserUtility::$_RELAY_
|| $return['REPORT'][$reportCnt]['HEADER']['SPORT-TYPE'] == ParserUtility::$_THROWING_TEAM_
|| $return['REPORT'][$reportCnt]['HEADER']['SPORT-TYPE'] == ParserUtility::$_JUMPING_TEAM_ )
&& trim(strtolower($col))==='school')
{
$schoolName = $this->__removeQuotedAlphabets(@$matrix[$col]['DATA']);
$return['REPORT'][$reportCnt]['DATA'][$recCnt][$col]['FINAL'] = $schoolName;
}
}
$return['REPORT'][$reportCnt]['DATA'][$recCnt]['UN-FORMATED']['FINAL'] = @$matrix['RAW-LINE']['DATA'];
$return['REPORT'][$reportCnt]['DATA'][$recCnt]['REMARKS']['FINAL'] = trim(@$matrix['MARK']['DATA'].' '.@$matrix['REMARKS']['DATA']);
if(trim(@$return['REPORT'][$reportCnt]['DATA'][$recCnt]['REMARKS']['FINAL']) !== '')
{
$finesse = $this->__extractNumericFromRemarks($return['REPORT'][$reportCnt]['DATA'][$recCnt]['REMARKS']['FINAL']);
if($finesse!==false)
{
$return['REPORT'][$reportCnt]['DATA'][$recCnt]['FINESSE'] = $finesse;
}
}
if(strlen(trim($theFinalVal))>0)
{
$recCnt++;
}
else
{
array_pop($return['REPORT'][$reportCnt]['DATA']);
}
}
}
else
{
$return['REPORT'][$reportCnt]['HAS-RECORDS'] = false;
$return['REPORT'][$reportCnt]['ERROR'] = $reportMatrix->getProblem();
}
$return['REPORT'][$reportCnt]['RAW-RECORDS'] = implode("\n",$reportMatrix->getReportLines());
$reportCnt++;
}
}
}
if(!isset($return['REPORT']))
{
$return['REPORT']=array();
}
foreach($return['REPORT'] as $ind=>$rptData)
{
if(!isset($rptData['DATA']))
{
$return['REPORT'][$ind]['DATA']=array();
}
}
return $return;
}
function process()
{
$this-> __processLines();
}
function __processLines()
{
if($this->debug) echo 'starting<br>';
$previousStatus = '';
$currentStatus = 'license-header';
$concatenator = "";
$lineObj = null;
$reportPartStarts = false;
$reportPartLevel = '';
foreach($this->rawDataLines as $lnNo=>$ln)
{
if($this->debug) echo '<br> <b>PROCESSING</b> ............. '.$ln;
if($this->debug) echo '<br> previousStatus ......... '.$previousStatus;
$analysis = new Lexicon($ln);
$analysis->preAnalyse();
if($this->debug) { echo '<br> <u>ANALYSIS</u> ::: <div style="max-height:100px; overflow:auto; border:thin solid #ccc; margin-left:60px; margin-right:60px;">'; print_r($analysis); echo "</div>"; }
if(!$reportPartStarts)
{
if($previousStatus=='')
{
//echo '<br>--->0-out~'.$analysis->getGeneralContext();
$currentStatus = 'license-header';
if($analysis->getGeneralContext() != 'DECORATOR' && $analysis->getGeneralContext() != 'LINE-BREAK' && ParserUtility::startsWithOneOfThem(trim($ln), ParserUtility::$terms_headerStartsWith) )
{
//echo '<br>--->0~'.$analysis->getGeneralContext();
$lineObj = $this->__getOneLineObject($currentStatus,$lineObj);
$lineObj->setOriginalContent($ln);
$lineObj->process();
}
$previousStatus = 'license-header';
}
else if($previousStatus=='license-header')
{
//if($this->debug) echo '<br> `'.$ln.'` <b>is a </b> '.$analysis->getGeneralContext() ;
//echo '<br>--->1/2-out~'.trim($ln).'~'.implode('#',ParserUtility::$terms_headerStartsWith);
//echo '<br>--->1/2-out~'.$analysis->getGeneralContext().'~>'.ParserUtility::containsOneOfThem(trim($ln), ParserUtility::$terms_headerLicenseTos).'<';
if($analysis->getGeneralContext() != 'DECORATOR' && $analysis->getGeneralContext() != 'LINE-BREAK' && ParserUtility::startsWithOneOfThem(trim($ln), ParserUtility::$terms_headerLicenseTos) )
{
//echo '<br>--->1~'.$analysis->getGeneralContext();
$lineObj->setOriginalContent($ln);
$lineObj->process();
}
elseif($analysis->getGeneralContext() != 'DECORATOR' && $analysis->getGeneralContext() != 'LINE-BREAK')
{
//echo '<br>--->2~'.$analysis->getGeneralContext();
$currentStatus = 'school-report-head';
$lineObj = $this->__getOneLineObject($currentStatus,$lineObj);
$lineObj->setOriginalContent($ln);
$lineObj->process();
$previousStatus = 'school-report-head';
}
}
else if($previousStatus=='school-report-head')
{
//echo '<br>--->3-out~'.$analysis->getGeneralContext();
if($analysis->getGeneralContext() != 'DECORATOR' && $analysis->getGeneralContext() != 'LINE-BREAK')
{
//echo '<br>--->3~'.$analysis->getGeneralContext();
$currentStatus = 'school-name-head';
$lineObj = $this->__getOneLineObject($currentStatus,$lineObj);
$lineObj->setOriginalContent($ln);
$lineObj->process();
$previousStatus = 'school-name-head';
}
}
else if($previousStatus=='school-name-head')
{
//echo '<br>--->4-out~'.$analysis->getGeneralContext();
if($analysis->getGeneralContext() != 'DECORATOR' && $analysis->getGeneralContext() != 'LINE-BREAK')
{
//echo '<br>--->4~'.$analysis->getGeneralContext();
$currentStatus = 'result-label';
$lineObj = $this->__getOneLineObject($currentStatus,$lineObj);
$lineObj->setOriginalContent($ln);
$lineObj->process();
$previousStatus = 'result-label';
$reportPartStarts = true;
$currentStatus = '';
}
}
/*
if($analysis->getGeneralContext() == 'DECORATOR')
{
$reportPartStarts = true;
$previousStatus='decorator-start';
$lineObj = $this->__getOneLineObject($currentStatus,$lineObj);
$lineObj->setOriginalContent($ln);
$lineObj->process();
$reportMatrix = $lineObj->getReportMatrix();
$reportMatrix->setDecoratorLength(strlen($ln));
}
*/
}
else
{
if(trim($ln)==='' || $analysis->getGeneralContext() == 'LINE-BREAK')
{
if($previousStatus=='line-break' || $previousStatus=='decorator-start')
{
$lineObj->setOriginalContent($ln);
$lineObj->process();
$previousStatus = 'line-break';
}
else
{
$reportPartLevel = '';
$currentStatus = 'line-break';
$lineObj = $this->__getOneLineObject($currentStatus,$lineObj);
$lineObj->setOriginalContent($ln);
$lineObj->process();
$previousStatus = 'line-break';
}
}
else
{
if($previousStatus=='line-break' || $previousStatus=='decorator-start')
{
$currentStatus = "report-data";
$lineObj = $this->__getOneLineObject($currentStatus,$lineObj);
$lineObj->setOriginalContent($ln);
$lineObj->process();
$reportMatrix = $lineObj->getReportMatrix();
if($analysis->getGeneralContext() == 'HEADER')
{
$reportMatrix->parseHeaders($ln);
$lineObj->setComposedData("SPORT-ROUND",$reportMatrix->getSportsRound());
}
$previousStatus = "report-data";
}
elseif($previousStatus == 'report-data')
{
$reportMatrix = $lineObj->getReportMatrix();
$lineObj->setComposedData("SPORT-ROUND",$reportMatrix->getSportsRound());
if($analysis->getGeneralContext() == 'DECORATOR')
{
$reportMatrix->setDecoratorLength(strlen($ln));
}
else if($analysis->getGeneralContext() == 'HEADER')
{
$reportMatrix->parseHeaders($ln);
}
else if($analysis->getGeneralContext() == 'SPORTS-ROUND')
{
$reportMatrix->parseData($ln);
$analysis->analyse();
$reportMatrix->setSportsLevel($analysis->getSportsRound());
$lineObj->setComposedData("SPORTS-ROUND",$reportMatrix->getSportsRound());
}
else
{
$reportMatrix->parseData($ln);
}
/*
if(ParserUtility::startsWith(trim($ln), "====="))
{
if($reportPartLevel == '')
{
$reportPartLevel = 'DecoratorStart';
$reportMatrix->setDecoratorLength(strlen($ln));
}
else
{
$reportPartLevel = 'DecoratorEnd';
}
continue;
}
else
{
if($reportPartLevel == 'DecoratorStart')
{
$reportMatrix->parseHeaders($ln);
}
else if($reportPartLevel == 'DecoratorEnd')
{
$reportMatrix->parseData($ln);
}
}
*/
}
}
//$lineObj = null;
//$lineObj = $this->__getOneLineObject("",$lineObj);
//$lineObj->process();
}
if($this->debug) echo '<br> <b>currentStatus</b> ......... '.$currentStatus;
if($this->debug) echo '<br> <b>previousStatus</b> ......... '.$previousStatus;
if($this->debug) { echo '<br> <u>LINE OBJECT</u> ::: <div style="max-height:100px; overflow:auto; border:thin solid #ff00ff; margin-left:60px; margin-right:60px;">'; print_r($lineObj); echo "</div>"; }
if($this->debug) echo '<br><br><h3 style="color:red"> :-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-::-: </h3><br>';
}
$this->__getOneLineObject("",$lineObj);
}
function __getOneLineObject($currentStatus,$previousLineObject)
{
if($previousLineObject !== null)
{
$this->processedDataLines[] = $previousLineObject;
}
return new RecordLine($currentStatus);
}
function __removeQuotedAlphabets($text)
{
$alphabets = array();
$alphabets[] = 'a';
$alphabets[] = 'b';
$alphabets[] = 'c';
$alphabets[] = 'd';
$alphabets[] = 'e';
$alphabets[] = 'f';
$alphabets[] = 'g';
$alphabets[] = 'h';
$alphabets[] = 'i';
$alphabets[] = 'j';
$alphabets[] = 'k';
$alphabets[] = 'l';
$alphabets[] = 'm';
$alphabets[] = 'n';
$alphabets[] = 'o';
$alphabets[] = 'p';
$alphabets[] = 'q';
$alphabets[] = 'r';
$alphabets[] = 's';
$alphabets[] = 't';
$alphabets[] = 'u';
$alphabets[] = 'v';
$alphabets[] = 'w';
$alphabets[] = 'x';
$alphabets[] = 'y';
$alphabets[] = 'z';
foreach($alphabets as $alphabet)
{
$text = trim(str_replace(" '".strtoupper($alphabet)."' "," ",$text));
$text = trim(str_replace(" '".$alphabet."' "," ",$text));
$text = trim(str_replace("'".strtoupper($alphabet)."' "," ",$text));
$text = trim(str_replace("'".$alphabet."' "," ",$text));
$text = trim(str_replace(" '".strtoupper($alphabet)."'"," ",$text));
$text = trim(str_replace(" '".$alphabet."'"," ",$text));
}
return trim($text);
}
function __cleanUpEventname($eventName, $specific=false)
{
$eventName = trim($eventName);
$endCharacters = array("/","-","[","(","{");
$dictionary = Lexicon::getDictionary();
if(ParserUtility::endsWithOneOfThem($eventName, $endCharacters))
{
$eventName = substr($eventName, 0, -1);
}
foreach($dictionary['GENDER'] as $gender)
{
if($specific)
{
$eventName = trim(str_replace(ucwords($gender),"",$eventName));
$eventName = trim(str_replace(strtoupper($gender),"",$eventName));
$eventName = trim(str_replace($gender,"",$eventName));
}
else
{
$eventName = trim(str_replace(' '.ucwords($gender).' '," ",$eventName));
$eventName = trim(str_replace(' '.strtoupper($gender).' '," ",$eventName));
$eventName = trim(str_replace(' '.$gender.' '," ",$eventName));
$eventName = trim(str_replace(ucwords($gender).' '," ",$eventName));
$eventName = trim(str_replace(strtoupper($gender).' '," ",$eventName));
$eventName = trim(str_replace($gender.' '," ",$eventName));
$eventName = trim(str_replace(' '.ucwords($gender)," ",$eventName));
$eventName = trim(str_replace(' '.strtoupper($gender)," ",$eventName));
$eventName = trim(str_replace(' '.$gender," ",$eventName));
}
}
foreach($dictionary['SPORTS-LEVEL'] as $levels)
{
foreach($dictionary[$levels] as $level)
{
if($specific)
{
$eventName = trim(str_replace(ucwords($level),"",$eventName));
$eventName = trim(str_replace(strtoupper($level),"",$eventName));
$eventName = trim(str_replace($level,"",$eventName));
}
else
{
$eventName = trim(str_replace(' '.ucwords($level).' '," ",$eventName));
$eventName = trim(str_replace(' '.strtoupper($level).' '," ",$eventName));
$eventName = trim(str_replace(' '.$level.' '," ",$eventName));
$eventName = trim(str_replace(ucwords($level).' '," ",$eventName));
$eventName = trim(str_replace(strtoupper($level).' '," ",$eventName));
$eventName = trim(str_replace($level.' '," ",$eventName));
$eventName = trim(str_replace(' '.ucwords($level)," ",$eventName));
$eventName = trim(str_replace(' '.strtoupper($level)," ",$eventName));
$eventName = trim(str_replace(' '.$level," ",$eventName));
}
}
}
$eventName = trim($eventName);
foreach($dictionary['GENDER'] as $gender)
{
if(stripos($eventName, $gender)!==false)
{
$eventName = $this->__cleanUpEventname($eventName,true);
}
}
foreach($dictionary['SPORTS-LEVEL'] as $levels)
{
foreach($dictionary[$levels] as $level)
{
if(stripos($eventName, $level)!==false)
{
$eventName = $this->__cleanUpEventname($eventName,true);
}
}
}
if(ParserUtility::endsWithOneOfThem($eventName, $endCharacters)!==false)
{
$eventName = $this->__cleanUpEventname($eventName,true);
}
return trim($eventName);
}
function __extractNumericFromRemarks($remarks)
{
if(strpos($remarks,'QUALIFIED')!==false)
{
$remrk = trim(str_replace('QUALIFIED','',$remarks));
$expld = explode(' ',$remrk);
if(sizeof($expld)==1)
{
if(is_numeric($remrk))
{
return array("RAW" => $remrk,"FINAL" => $remrk);
}
elseif(strpos($remrk,"-") )
{
$remrkF = ParserUtility::numerizeLength($remrk);
if(is_numeric($remrkF))
{
return array("RAW" => $remrk,"FINAL" => $remrkF);
}
}
elseif(strpos($remrk,":"))
{
$remrkF = ParserUtility::numerizeTime($remrk);
if(is_numeric($remrkF))
{
return array("RAW" => $remrk,"FINAL" => $remrkF);
}
}
}
}
return false;
}
}
?>