Vous avez reçu un message "Your GitLab account has been locked ..." ? Pas d'inquiétude : lisez cet article https://docs.gricad-pages.univ-grenoble-alpes.fr/help/unlock/

Commit ca6c2731 authored by Arnaud Bey's avatar Arnaud Bey
Browse files

WIP stats diversité grilles

parent aa3fce9e
...@@ -34,5 +34,6 @@ application/web/upload/profilepic/* ...@@ -34,5 +34,6 @@ application/web/upload/profilepic/*
application/web/files/* application/web/files/*
!aplication/web/files/.gitkeep !aplication/web/files/.gitkeep
application/data/lexicons/* application/data/lexicons/*
application/data/*.txt
!application/data/lexicons/.gitkeep !application/data/lexicons/.gitkeep
*.php~ *.php~
...@@ -8,6 +8,7 @@ use Symfony\Component\Console\Input\InputArgument; ...@@ -8,6 +8,7 @@ use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Question\Question; use Symfony\Component\Console\Question\Question;
use Symfony\Component\Filesystem\Filesystem;
class GenerateGridCommand extends ContainerAwareCommand class GenerateGridCommand extends ContainerAwareCommand
{ {
...@@ -46,6 +47,10 @@ class GenerateGridCommand extends ContainerAwareCommand ...@@ -46,6 +47,10 @@ class GenerateGridCommand extends ContainerAwareCommand
$worst = 1000; $worst = 1000;
$minBigram = (!$minBigram) ? null : $minBigram; $minBigram = (!$minBigram) ? null : $minBigram;
// STATS
$globalFoundables = []; // stockage des foundables distincts
$foundablesCountByGrid = []; // stockage du nombre de mot par grille
while ($keptGrid < $number) { while ($keptGrid < $number) {
$language = $em->getRepository('LexiconBundle:Language')->find($idLexicon); $language = $em->getRepository('LexiconBundle:Language')->find($idLexicon);
$languageName = $language->getValue(); $languageName = $language->getValue();
...@@ -53,7 +58,16 @@ class GenerateGridCommand extends ContainerAwareCommand ...@@ -53,7 +58,16 @@ class GenerateGridCommand extends ContainerAwareCommand
$grid = $gridManager->generate($language, true, $minBigram); $grid = $gridManager->generate($language, true, $minBigram);
$timeEnd = microtime(true); $timeEnd = microtime(true);
$executionTime = round($timeEnd - $timeStart, 2); $executionTime = round($timeEnd - $timeStart, 2);
$formCount = $grid->getFoundableForms()? count($grid->getFoundableForms()) : 0; $foundables = $grid->getFoundableForms();
$formCount = $foundables ? count($foundables) : 0;
// STATS
$foundablesCountByGrid[] = $formCount;
foreach ($foundables as $foundable) {
$form = $foundable->getForm();
$globalFoundables[$form] = (isset($globalFoundables[$form])) ? $globalFoundables[$form] + 1 : 1;
}
$totalFormCountAll += $formCount; $totalFormCountAll += $formCount;
$best = ($formCount > $best) ? $formCount : $best; $best = ($formCount > $best) ? $formCount : $best;
$worst = ($formCount < $worst) ? $formCount : $worst; $worst = ($formCount < $worst) ? $formCount : $worst;
...@@ -79,12 +93,15 @@ class GenerateGridCommand extends ContainerAwareCommand ...@@ -79,12 +93,15 @@ class GenerateGridCommand extends ContainerAwareCommand
$em->clear(); $em->clear();
$allGrid++; $allGrid++;
$averageAll = round($totalFormCountAll / $allGrid); $averageAll = round($totalFormCountAll / $allGrid);
$countDistinctFoundables = count($globalFoundables);
$output->writeln('Average form count for all '.$allGrid.' grids : <error>'.$averageAll.'</error>'); $output->writeln('Average form count for all '.$allGrid.' grids : <error>'.$averageAll.'</error>');
$output->writeln('Best grid : <error>'.$best.'</error>'); $output->writeln('Best grid : <error>'.$best.'</error>');
$output->writeln('Worst grid : <error>'.$worst.'</error>'); $output->writeln('Worst grid : <error>'.$worst.'</error>');
$output->writeln('Distinct foundables count : <error>'.$countDistinctFoundables.'</error>');
$output->writeln(''); $output->writeln('');
} }
$countDistinctFoundables = count($globalFoundables);
$average = round($totalFormCount / $keptGrid); $average = round($totalFormCount / $keptGrid);
$output->writeln('<info>################### DONE ! ############################################</info>'); $output->writeln('<info>################### DONE ! ############################################</info>');
$output->writeln('Average form count: for '. $keptGrid .' kept grids : <error>'.$average.'</error>'); $output->writeln('Average form count: for '. $keptGrid .' kept grids : <error>'.$average.'</error>');
...@@ -92,6 +109,17 @@ class GenerateGridCommand extends ContainerAwareCommand ...@@ -92,6 +109,17 @@ class GenerateGridCommand extends ContainerAwareCommand
$output->writeln('Average form count for all '. $allGrid .' grids : <error>'.$averageAll.'</error>'); $output->writeln('Average form count for all '. $allGrid .' grids : <error>'.$averageAll.'</error>');
$output->writeln('Best grid : <error>'.$best.'</error>'); $output->writeln('Best grid : <error>'.$best.'</error>');
$output->writeln('Worst grid : <error>'.$worst.'</error>'); $output->writeln('Worst grid : <error>'.$worst.'</error>');
$output->writeln('Nombre de formes différentes : <error>'.$countDistinctFoundables.'</error>');
$output->writeln('<info>#######################################################################</info>'); $output->writeln('<info>#######################################################################</info>');
//STATS
$filesystem = new Filesystem();
$filename = "data/import-".date("Y-m-d")."-".time()."-".uniqid().".txt";
arsort($globalFoundables);
file_put_contents($filename, "count\t".count($globalFoundables)."\n", FILE_APPEND);
foreach ($globalFoundables as $key => $value) {
file_put_contents($filename, $key."\t".$value."\n", FILE_APPEND);
}
} }
} }
...@@ -361,27 +361,46 @@ class GridManager ...@@ -361,27 +361,46 @@ class GridManager
$minBigram = ($minBigram == null) ? $language->getMinBigram() : $minBigram; $minBigram = ($minBigram == null) ? $language->getMinBigram() : $minBigram;
// On crée un tableau de bigrammes // On crée un tableau de bigrammes
// où chaque bigramme apparait autant de fois que son poids // où chaque bigramme apparait autant de fois que son poids
$bigrams = []; // Modif: on garde les $minBigram pourcent les plus productifs des bigrammes
// Génération d'un tableau qui a chaque bigram associe le % de bigram moins fréquents (centile)
$centilesBigrams = [];
$poidsBigrams = [];
$bigramsToFreq = [];
$nbBigrams=0;
$lines = $this->languageManager->getBigrams($language); $lines = $this->languageManager->getBigrams($language);
$lettersFromBigrams=[];
array_shift($lines); array_shift($lines);
foreach ($lines as $line) { foreach ($lines as $line) {
$tab = explode("\t", $line); $tab = explode("\t", $line);
$string = $tab[0]; $string = $tab[0];
$freqBigramBi = $tab[1]; $freqBigramBi = $tab[1];
$poids = $tab[3]; $poids = $tab[3];
//si Freq du bigram et son inverse est < à min défini par user alors on le prend pas en compte $nbBigrams++;
if ($freqBigramBi > $minBigram) { $bigramsToFreq[$string]=$freqBigramBi;
$bigramLetters = preg_split('//u', $string, null, PREG_SPLIT_NO_EMPTY); $poidsBigrams[$string]=$freqBigramBi;
}
asort($bigramsToFreq);
$nbInferiorBigram=0;
$bigrams = [];
// $lines = $this->languageManager->getBigrams($language);
$lettersFromBigrams=[];
// array_shift($lines);
foreach ($bigramsToFreq as $bigramToFreq => $value) {
$nbInferiorBigram++;
$centile = 100 - intval(($nbBigrams-$nbInferiorBigram)*100/$nbBigrams);
$centilesBigrams[$bigramToFreq]=$centile;
//file_put_contents("data/log_bigram.txt", $bigramToFreq."\t".$centile."\n", FILE_APPEND);
if ($centilesBigrams[$bigramToFreq] >= $minBigram) {
$bigramLetters = preg_split('//u', $bigramToFreq, null, PREG_SPLIT_NO_EMPTY);
foreach ($bigramLetters as $bigramLetter) { foreach ($bigramLetters as $bigramLetter) {
$lettersFromBigrams[$bigramLetter] = 1; $lettersFromBigrams[$bigramLetter] = 1;
} }
for ($j = 0; $j < $poids; $j++) { for ($j = 0; $j < $poidsBigrams[$bigramToFreq]; $j++) {
$bigrams[] = $string; $bigrams[] = $bigramToFreq;
} }
} }
} }
$countsBigram = array_count_values($bigrams); $countsBigram = array_count_values($bigrams);
// var_dump($countsBigram); // var_dump($countsBigram);
// die(); // die();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment