Vous avez reçu un message "Your GitLab account has been locked ..." ? Pas d'inquiétude : lisez cet article https://docs.gricad-pages.univ-grenoble-alpes.fr/help/unlock/

Commit 634944a2 authored by Arnaud Bey's avatar Arnaud Bey
Browse files

improve root perf

parent 8f4dbe5b
......@@ -5,9 +5,13 @@ namespace LexiconBundle\Entity;
use Doctrine\Common\Collections\ArrayCollection;
use Doctrine\Common\Collections\Collection;
use Doctrine\ORM\Mapping as ORM;
use Doctrine\ORM\Mapping\Index;
/**
* @ORM\Table(name="lexicon_root")
* @ORM\Table(name="lexicon_root", indexes={
* @Index(columns={"value"}, flags={"fulltext"}),
* @Index(name="language", columns={"language_id"}),
* })
* @ORM\Entity(repositoryClass="LexiconBundle\Repository\RootRepository")
*/
class Root
......
......@@ -143,20 +143,32 @@ class ImportManager
$cpt = 0;
$maxToFlush = 5000;
$total = count(file($pathFileLexicon));
$stopwatch = new Stopwatch();
$bigrams=[];
$stopwatchName = uniqid();
$stopwatch = new Stopwatch();
/*
$rootTime = "root-".uniqid();
$featureTime = "feature-".uniqid();
$startTime = "start-".uniqid();
*/
$stopwatch->start("global_import");
$stopwatch->start("global_import");
$stopwatch->start($stopwatchName);
$handle = @fopen($pathFileLexicon, "r");
if ($handle) {
while (($line = fgets($handle, 4096)) !== false) {
if ($flushCpt !== 0) {
if (preg_match_all("/^([^\t]+)\t([^\t]+)\t([^\t]+)\s*$/", $line, $matches)) {
$uid1 = uniqid();
$uid2 = uniqid();
$iterationTime = "iteration-".$uid1.$uid2;
$stopwatch->start($iterationTime);
$wordValue = $matches[1][0];
$rootValue = $matches[2][0];
#mb_eregi_replace ?
......@@ -167,18 +179,26 @@ class ImportManager
//$cleanWordValue = preg_replace("/\P{L}/", "", $cleanWordValue);#bug pour le russe
$cleanWordValue = mb_eregi_replace("/\P{L}/", "", $cleanWordValue);#fonctionne pour le russe
$string2print.=" / EREGI = ".$cleanWordValue."\n";
// Gestion de la root
// ROOT
//$stopwatch->start($rootTime);
$root = $this->rm->findOrCreate($language, $rootValue, $roots);
//$stopwatch->stop($rootTime);
// Gestion des features
// FEATURES
//$stopwatch->start($featureTime);
$labelsNValues = explode(",", $matches[3][0]);
$features = [];
foreach ($labelsNValues as $labelNValue) {
$featureStringTab = explode("=", $labelNValue);
$features[] = $this->fm->findOrCreate($language, $featureStringTab[0], $featureStringTab[1], $featuresToFlush);
}
//$stopwatch->stop($featureTime);
// WORD
$this->wm->create($language, $root, $features, $wordValue, $cleanWordValue);
//$stopwatch->start($startTime);
// Gestion des lettres et débuts de mots
$wordsLetters = preg_split('//u', $cleanWordValue, null, PREG_SPLIT_NO_EMPTY);
$wordStartString = "";
......@@ -198,14 +218,21 @@ class ImportManager
}
$previousLetter = $wordLetter;
}
//$stopwatch->stop($startTime);
if ($flushCpt == $maxToFlush) {
$flushTime = "start-".$uid1.$uid2;
$stopwatch->start($flushTime);
$this->wm->createStarts($language, $wordStarts);
unset($wordStarts);
$wordStarts = null;
$wordStarts = [];
$this->flushAndFreeMemory();
$flushCpt = 1;
unset($roots);
$roots = null;
$roots = [];
unset($featuresToFlush);
$featuresToFlush = null;
$featuresToFlush = [];
$languageId = $specs["language_id"];
......@@ -213,9 +240,30 @@ class ImportManager
$percent = round($cpt / $total * 100, 2);
echo("[".$percent."%] ".$wordValue."\n");
$event = $stopwatch->stop($stopwatchName);
$stopwatchName = uniqid();
echo "max memory > " . $event->getMemory()/1048576 . " MB \n";
echo "duration > " . $event->getDuration()/1000 . " seconds \n\n";
/*
$event = $stopwatch->stop($flushTime);
echo "FLUSH duration > " . $event->getDuration()/1000 . " seconds \n";
$event = $stopwatch->start($rootTime);
$event = $stopwatch->stop($rootTime);
echo "ROOT duration > " . $event->getDuration()/1000 . " seconds \n";
$event = $stopwatch->start($featureTime);
$event = $stopwatch->stop($featureTime);
echo "FEATURE duration > " . $event->getDuration()/1000 . " seconds \n";
$event = $stopwatch->start($startTime);
$event = $stopwatch->stop($startTime);
echo "START duration > " . $event->getDuration()/1000 . " seconds \n\n";
$rootTime = "root-".uniqid();
$featureTime = "feature-".uniqid();
$startTime = "start-".uniqid();
*/
$stopwatchName = uniqid();
$stopwatch->start($stopwatchName);
}
}
......
......@@ -27,13 +27,17 @@ class RootManager
public function findOrCreate(Language $language, $value, &$roots)
{
if (!isset($roots[$value])) {
if (!$root = $this->em->getRepository(Root::class)->findOneBy(['value' => $value, 'language' => $language])) {
$rootId = $this->em->getRepository(Root::class)->search($value, $language->getId());
if (empty($rootId)) {
$root = new Root;
$root->setLanguage($language);
$root->setValue($value);
$this->em->persist($root);
$roots[$value] = $root;
} else {
$rootId = $rootId[0]["id"];
$root = $this->em->getRepository(Root::class)->find($rootId);
}
return $root;
......
......@@ -37,4 +37,15 @@ class RootRepository extends \Doctrine\ORM\EntityRepository
return $query->getResult();
}
public function search($value, $languageId)
{
$sql = "SELECT id FROM lexicon_root WHERE MATCH(value) AGAINST ('".addslashes($value)."' IN NATURAL LANGUAGE MODE) AND language_id = ".$languageId." LIMIT 1";
$em = $this->_em;
$stmt = $em->getConnection()->prepare($sql);
$stmt->execute();
return $stmt->fetchAll();
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment