Commit 9ca21916 authored by Arnaud Bey's avatar Arnaud Bey
Browse files

misc. (grid generation cmd)

parent 0422a8e7
......@@ -34,6 +34,6 @@ application/web/upload/profilepic/*
application/web/files/*
!aplication/web/files/.gitkeep
application/data/lexicons/*
application/data/*.txt
application/data/*.csv
!application/data/lexicons/.gitkeep
*.php~
......@@ -48,8 +48,10 @@ class GenerateGridCommand extends ContainerAwareCommand
$minBigram = (!$minBigram) ? null : $minBigram;
// STATS
$globalFoundables = []; // stockage des foundables distincts
$foundablesCountByGrid = []; // stockage du nombre de mot par grille
$averageCombo = 0;
$totalLemma = 0;
$totalComboCounts = 0;
$globalLemmas = [];
while ($keptGrid < $number) {
$language = $em->getRepository('LexiconBundle:Language')->find($idLexicon);
......@@ -60,15 +62,21 @@ class GenerateGridCommand extends ContainerAwareCommand
$executionTime = round($timeEnd - $timeStart, 2);
$foundables = $grid->getFoundableForms();
$formCount = $foundables ? count($foundables) : 0;
$lemmasCombos = $gridManager->countLemmasAndCombos($grid);
// STATS
$foundablesCountByGrid[] = $formCount;
foreach ($foundables as $foundable) {
$form = $foundable->getForm();
$globalFoundables[$form] = (isset($globalFoundables[$form])) ? $globalFoundables[$form] + 1 : 1;
$totalFormCountAll += $formCount;
$averageCombo += $lemmasCombos["comboAverage"];
$totalLemma += $lemmasCombos["lemmaCount"];
$totalComboCounts += $lemmasCombos["comboCount"];
$lemmas = $lemmasCombos["lemmas"];
foreach ($lemmas as $lemma) {
$globalLemmas[] = $lemma['root']['id'];
}
$globalLemmas = array_unique($globalLemmas, SORT_NUMERIC);
$totalFormCountAll += $formCount;
$best = ($formCount > $best) ? $formCount : $best;
$worst = ($formCount < $worst) ? $formCount : $worst;
if ($threshold && $formCount < $threshold) {
......@@ -86,30 +94,42 @@ class GenerateGridCommand extends ContainerAwareCommand
$em->flush();
} else {
$keptGrid++;
$output->writeln('<info>('.$keptGrid.') A grid has been generated and contains '.$formCount.' forms (generated in '.$executionTime.' sec.)</info>');
$output->writeln('<info>('.$keptGrid.') A grid has been generated and contains '.$formCount.' forms from '.$lemmasCombos["lemmaCount"].' lemmas with '.$lemmasCombos["comboCount"].' combos (generated in '.$executionTime.' sec.)</info>');
$totalFormCount += $formCount;
}
$em->clear();
$allGrid++;
$averageCombos = round($averageCombo/ $allGrid, 2);
$averageLemmas = round($totalLemma/ $allGrid);
$averageAll = round($totalFormCountAll / $allGrid);
$countDistinctFoundables = count($globalFoundables);
$output->writeln('Average form count for all '.$allGrid.' grids : <error>'.$averageAll.'</error>');
$averageComboCounts = round($totalComboCounts / $allGrid);
$distinctLemmas = count($globalLemmas);
$output->writeln('Count distinct lemmas : <error>'.$distinctLemmas.'</error>');
$output->writeln('Average form count : <error>'.$averageAll.'</error>');
$output->writeln('Average Lemma count : <error>'.$averageLemmas.'</error>');
$output->writeln('Average combo count : <error>'.$averageComboCounts.'</error>');
$output->writeln('Average combo length : <error>'.$averageCombos.'</error>');
$output->writeln('Best grid : <error>'.$best.'</error>');
$output->writeln('Worst grid : <error>'.$worst.'</error>');
$output->writeln('Distinct foundables count : <error>'.$countDistinctFoundables.'</error>');
$output->writeln('');
}
$countDistinctFoundables = count($globalFoundables);
$average = round($totalFormCount / $keptGrid);
$output->writeln('<info>################### DONE ! ############################################</info>');
$output->writeln('Average form count: for '. $keptGrid .' kept grids : <error>'.$average.'</error>');
$output->writeln('<info>################### DONE ! ################################</info>');
//$output->writeln('Average form count: for '. $keptGrid .' kept grids : <error>'.$average.'</error>');
$averageAll = round($totalFormCountAll / $allGrid);
$output->writeln('Average form count for all '. $allGrid .' grids : <error>'.$averageAll.'</error>');
$output->writeln('Count distinct lemmas : <error>'.$distinctLemmas.'</error>');
$output->writeln('Average form count : <error>'.$averageAll.'</error>');
$output->writeln('Average lemma count : <error>'.$averageLemmas.'</error>');
$output->writeln('Average combo count : <error>'.$averageComboCounts.'</error>');
$output->writeln('Average combo length : <error>'.$averageCombos.'</error>');
$output->writeln('Best grid : <error>'.$best.'</error>');
$output->writeln('Worst grid : <error>'.$worst.'</error>');
$output->writeln('Nombre de formes différentes : <error>'.$countDistinctFoundables.'</error>');
$output->writeln('<info>#######################################################################</info>');
$output->writeln('<info>###########################################################</info>');
$file = "data/import-".$idLexicon.".csv";
$output = $minBigram.";".$number.";".$averageLemmas.";".$distinctLemmas.";".$averageComboCounts."\n";
file_put_contents($file, $output, FILE_APPEND);
}
}
......@@ -119,6 +119,20 @@ class GridManager
return $grid;
}
public function countLemmasAndCombos(Grid $grid)
{
$words = $this->retrieveInflections($grid);
$lemmas = $this->getCombos($words);
$lemmaCount = count($lemmas);
$combos = array_filter($lemmas, function ($v) {
return count($v['words']) > 1;
});
$average = $this->getComboAverage($combos);
return ["lemmaCount" => $lemmaCount, "comboAverage" => $average, "comboCount" => count($combos), "lemmas" => $lemmas];
}
private function removeSquares(Grid $grid)
{
$squares = $grid->getSquares();
......@@ -167,6 +181,18 @@ class GridManager
return $grid->getFoundableForms();
}
public function getComboAverage($combos)
{
$length = count($combos);
$totalCombo = 0;
foreach ($combos as $combo) {
$totalCombo += count($combo["words"]);
}
$average = $totalCombo/$length;
return $average;
}
public function getCombos($words)
{
$combos = [];
......@@ -190,15 +216,15 @@ class GridManager
}
}
$combos = array_filter($combos, function ($v) {
return count($v['words']) > 1;
$lemmas = array_filter($combos, function ($v) {
return count($v['words']) > 0;
});
usort($combos, function ($a, $b) {
usort($lemmas, function ($a, $b) {
return count($b['words']) - count($a['words']);
});
return $combos;
return $lemmas;
}
public function findWords(Grid $grid)
......@@ -327,15 +353,15 @@ class GridManager
public function retrieveInflections(Grid $grid)
{
$inflections = [];
$words = [];
$foundables = $grid->getFoundableForms();
foreach ($foundables as $foundable) {
foreach ($foundable->getInflections() as $inflection) {
$inflections[] = $inflection;
foreach ($foundable->getWords() as $word) {
$words[] = $word;
}
}
return $inflections;
return $words;
}
public function export(Grid $grid = null)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment