Commit 9ca21916 authored by Arnaud Bey's avatar Arnaud Bey
Browse files

misc. (grid generation cmd)

parent 0422a8e7
...@@ -34,6 +34,6 @@ application/web/upload/profilepic/* ...@@ -34,6 +34,6 @@ application/web/upload/profilepic/*
application/web/files/* application/web/files/*
!aplication/web/files/.gitkeep !aplication/web/files/.gitkeep
application/data/lexicons/* application/data/lexicons/*
application/data/*.txt application/data/*.csv
!application/data/lexicons/.gitkeep !application/data/lexicons/.gitkeep
*.php~ *.php~
...@@ -48,8 +48,10 @@ class GenerateGridCommand extends ContainerAwareCommand ...@@ -48,8 +48,10 @@ class GenerateGridCommand extends ContainerAwareCommand
$minBigram = (!$minBigram) ? null : $minBigram; $minBigram = (!$minBigram) ? null : $minBigram;
// STATS // STATS
$globalFoundables = []; // stockage des foundables distincts $averageCombo = 0;
$foundablesCountByGrid = []; // stockage du nombre de mot par grille $totalLemma = 0;
$totalComboCounts = 0;
$globalLemmas = [];
while ($keptGrid < $number) { while ($keptGrid < $number) {
$language = $em->getRepository('LexiconBundle:Language')->find($idLexicon); $language = $em->getRepository('LexiconBundle:Language')->find($idLexicon);
...@@ -60,15 +62,21 @@ class GenerateGridCommand extends ContainerAwareCommand ...@@ -60,15 +62,21 @@ class GenerateGridCommand extends ContainerAwareCommand
$executionTime = round($timeEnd - $timeStart, 2); $executionTime = round($timeEnd - $timeStart, 2);
$foundables = $grid->getFoundableForms(); $foundables = $grid->getFoundableForms();
$formCount = $foundables ? count($foundables) : 0; $formCount = $foundables ? count($foundables) : 0;
$lemmasCombos = $gridManager->countLemmasAndCombos($grid);
// STATS // STATS
$foundablesCountByGrid[] = $formCount; $totalFormCountAll += $formCount;
foreach ($foundables as $foundable) { $averageCombo += $lemmasCombos["comboAverage"];
$form = $foundable->getForm(); $totalLemma += $lemmasCombos["lemmaCount"];
$globalFoundables[$form] = (isset($globalFoundables[$form])) ? $globalFoundables[$form] + 1 : 1; $totalComboCounts += $lemmasCombos["comboCount"];
$lemmas = $lemmasCombos["lemmas"];
foreach ($lemmas as $lemma) {
$globalLemmas[] = $lemma['root']['id'];
} }
$globalLemmas = array_unique($globalLemmas, SORT_NUMERIC);
$totalFormCountAll += $formCount;
$best = ($formCount > $best) ? $formCount : $best; $best = ($formCount > $best) ? $formCount : $best;
$worst = ($formCount < $worst) ? $formCount : $worst; $worst = ($formCount < $worst) ? $formCount : $worst;
if ($threshold && $formCount < $threshold) { if ($threshold && $formCount < $threshold) {
...@@ -86,30 +94,42 @@ class GenerateGridCommand extends ContainerAwareCommand ...@@ -86,30 +94,42 @@ class GenerateGridCommand extends ContainerAwareCommand
$em->flush(); $em->flush();
} else { } else {
$keptGrid++; $keptGrid++;
$output->writeln('<info>('.$keptGrid.') A grid has been generated and contains '.$formCount.' forms (generated in '.$executionTime.' sec.)</info>'); $output->writeln('<info>('.$keptGrid.') A grid has been generated and contains '.$formCount.' forms from '.$lemmasCombos["lemmaCount"].' lemmas with '.$lemmasCombos["comboCount"].' combos (generated in '.$executionTime.' sec.)</info>');
$totalFormCount += $formCount; $totalFormCount += $formCount;
} }
$em->clear(); $em->clear();
$allGrid++; $allGrid++;
$averageCombos = round($averageCombo/ $allGrid, 2);
$averageLemmas = round($totalLemma/ $allGrid);
$averageAll = round($totalFormCountAll / $allGrid); $averageAll = round($totalFormCountAll / $allGrid);
$countDistinctFoundables = count($globalFoundables); $averageComboCounts = round($totalComboCounts / $allGrid);
$output->writeln('Average form count for all '.$allGrid.' grids : <error>'.$averageAll.'</error>'); $distinctLemmas = count($globalLemmas);
$output->writeln('Count distinct lemmas : <error>'.$distinctLemmas.'</error>');
$output->writeln('Average form count : <error>'.$averageAll.'</error>');
$output->writeln('Average Lemma count : <error>'.$averageLemmas.'</error>');
$output->writeln('Average combo count : <error>'.$averageComboCounts.'</error>');
$output->writeln('Average combo length : <error>'.$averageCombos.'</error>');
$output->writeln('Best grid : <error>'.$best.'</error>'); $output->writeln('Best grid : <error>'.$best.'</error>');
$output->writeln('Worst grid : <error>'.$worst.'</error>'); $output->writeln('Worst grid : <error>'.$worst.'</error>');
$output->writeln('Distinct foundables count : <error>'.$countDistinctFoundables.'</error>');
$output->writeln(''); $output->writeln('');
} }
$countDistinctFoundables = count($globalFoundables);
$average = round($totalFormCount / $keptGrid); $average = round($totalFormCount / $keptGrid);
$output->writeln('<info>################### DONE ! ############################################</info>'); $output->writeln('<info>################### DONE ! ################################</info>');
$output->writeln('Average form count: for '. $keptGrid .' kept grids : <error>'.$average.'</error>'); //$output->writeln('Average form count: for '. $keptGrid .' kept grids : <error>'.$average.'</error>');
$averageAll = round($totalFormCountAll / $allGrid); $averageAll = round($totalFormCountAll / $allGrid);
$output->writeln('Average form count for all '. $allGrid .' grids : <error>'.$averageAll.'</error>'); $output->writeln('Count distinct lemmas : <error>'.$distinctLemmas.'</error>');
$output->writeln('Average form count : <error>'.$averageAll.'</error>');
$output->writeln('Average lemma count : <error>'.$averageLemmas.'</error>');
$output->writeln('Average combo count : <error>'.$averageComboCounts.'</error>');
$output->writeln('Average combo length : <error>'.$averageCombos.'</error>');
$output->writeln('Best grid : <error>'.$best.'</error>'); $output->writeln('Best grid : <error>'.$best.'</error>');
$output->writeln('Worst grid : <error>'.$worst.'</error>'); $output->writeln('Worst grid : <error>'.$worst.'</error>');
$output->writeln('Nombre de formes différentes : <error>'.$countDistinctFoundables.'</error>'); $output->writeln('<info>###########################################################</info>');
$output->writeln('<info>#######################################################################</info>');
$file = "data/import-".$idLexicon.".csv";
$output = $minBigram.";".$number.";".$averageLemmas.";".$distinctLemmas.";".$averageComboCounts."\n";
file_put_contents($file, $output, FILE_APPEND);
} }
} }
...@@ -119,6 +119,20 @@ class GridManager ...@@ -119,6 +119,20 @@ class GridManager
return $grid; return $grid;
} }
public function countLemmasAndCombos(Grid $grid)
{
$words = $this->retrieveInflections($grid);
$lemmas = $this->getCombos($words);
$lemmaCount = count($lemmas);
$combos = array_filter($lemmas, function ($v) {
return count($v['words']) > 1;
});
$average = $this->getComboAverage($combos);
return ["lemmaCount" => $lemmaCount, "comboAverage" => $average, "comboCount" => count($combos), "lemmas" => $lemmas];
}
private function removeSquares(Grid $grid) private function removeSquares(Grid $grid)
{ {
$squares = $grid->getSquares(); $squares = $grid->getSquares();
...@@ -167,6 +181,18 @@ class GridManager ...@@ -167,6 +181,18 @@ class GridManager
return $grid->getFoundableForms(); return $grid->getFoundableForms();
} }
public function getComboAverage($combos)
{
$length = count($combos);
$totalCombo = 0;
foreach ($combos as $combo) {
$totalCombo += count($combo["words"]);
}
$average = $totalCombo/$length;
return $average;
}
public function getCombos($words) public function getCombos($words)
{ {
$combos = []; $combos = [];
...@@ -190,15 +216,15 @@ class GridManager ...@@ -190,15 +216,15 @@ class GridManager
} }
} }
$combos = array_filter($combos, function ($v) { $lemmas = array_filter($combos, function ($v) {
return count($v['words']) > 1; return count($v['words']) > 0;
}); });
usort($combos, function ($a, $b) { usort($lemmas, function ($a, $b) {
return count($b['words']) - count($a['words']); return count($b['words']) - count($a['words']);
}); });
return $combos; return $lemmas;
} }
public function findWords(Grid $grid) public function findWords(Grid $grid)
...@@ -327,15 +353,15 @@ class GridManager ...@@ -327,15 +353,15 @@ class GridManager
public function retrieveInflections(Grid $grid) public function retrieveInflections(Grid $grid)
{ {
$inflections = []; $words = [];
$foundables = $grid->getFoundableForms(); $foundables = $grid->getFoundableForms();
foreach ($foundables as $foundable) { foreach ($foundables as $foundable) {
foreach ($foundable->getInflections() as $inflection) { foreach ($foundable->getWords() as $word) {
$inflections[] = $inflection; $words[] = $word;
} }
} }
return $inflections; return $words;
} }
public function export(Grid $grid = null) public function export(Grid $grid = null)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment