Commit 634944a2 authored by Arnaud Bey's avatar Arnaud Bey
Browse files

improve root perf

parent 8f4dbe5b
...@@ -5,9 +5,13 @@ namespace LexiconBundle\Entity; ...@@ -5,9 +5,13 @@ namespace LexiconBundle\Entity;
use Doctrine\Common\Collections\ArrayCollection; use Doctrine\Common\Collections\ArrayCollection;
use Doctrine\Common\Collections\Collection; use Doctrine\Common\Collections\Collection;
use Doctrine\ORM\Mapping as ORM; use Doctrine\ORM\Mapping as ORM;
use Doctrine\ORM\Mapping\Index;
/** /**
* @ORM\Table(name="lexicon_root") * @ORM\Table(name="lexicon_root", indexes={
* @Index(columns={"value"}, flags={"fulltext"}),
* @Index(name="language", columns={"language_id"}),
* })
* @ORM\Entity(repositoryClass="LexiconBundle\Repository\RootRepository") * @ORM\Entity(repositoryClass="LexiconBundle\Repository\RootRepository")
*/ */
class Root class Root
......
...@@ -143,20 +143,32 @@ class ImportManager ...@@ -143,20 +143,32 @@ class ImportManager
$cpt = 0; $cpt = 0;
$maxToFlush = 5000; $maxToFlush = 5000;
$total = count(file($pathFileLexicon)); $total = count(file($pathFileLexicon));
$stopwatch = new Stopwatch();
$bigrams=[]; $bigrams=[];
$stopwatchName = uniqid(); $stopwatchName = uniqid();
$stopwatch = new Stopwatch(); /*
$rootTime = "root-".uniqid();
$featureTime = "feature-".uniqid();
$startTime = "start-".uniqid();
*/
$stopwatch->start("global_import");
$stopwatch->start("global_import");
$stopwatch->start($stopwatchName); $stopwatch->start($stopwatchName);
$handle = @fopen($pathFileLexicon, "r"); $handle = @fopen($pathFileLexicon, "r");
if ($handle) { if ($handle) {
while (($line = fgets($handle, 4096)) !== false) { while (($line = fgets($handle, 4096)) !== false) {
if ($flushCpt !== 0) { if ($flushCpt !== 0) {
if (preg_match_all("/^([^\t]+)\t([^\t]+)\t([^\t]+)\s*$/", $line, $matches)) { if (preg_match_all("/^([^\t]+)\t([^\t]+)\t([^\t]+)\s*$/", $line, $matches)) {
$uid1 = uniqid();
$uid2 = uniqid();
$iterationTime = "iteration-".$uid1.$uid2;
$stopwatch->start($iterationTime);
$wordValue = $matches[1][0]; $wordValue = $matches[1][0];
$rootValue = $matches[2][0]; $rootValue = $matches[2][0];
#mb_eregi_replace ? #mb_eregi_replace ?
...@@ -167,18 +179,26 @@ class ImportManager ...@@ -167,18 +179,26 @@ class ImportManager
//$cleanWordValue = preg_replace("/\P{L}/", "", $cleanWordValue);#bug pour le russe //$cleanWordValue = preg_replace("/\P{L}/", "", $cleanWordValue);#bug pour le russe
$cleanWordValue = mb_eregi_replace("/\P{L}/", "", $cleanWordValue);#fonctionne pour le russe $cleanWordValue = mb_eregi_replace("/\P{L}/", "", $cleanWordValue);#fonctionne pour le russe
$string2print.=" / EREGI = ".$cleanWordValue."\n"; $string2print.=" / EREGI = ".$cleanWordValue."\n";
// Gestion de la root
// ROOT
//$stopwatch->start($rootTime);
$root = $this->rm->findOrCreate($language, $rootValue, $roots); $root = $this->rm->findOrCreate($language, $rootValue, $roots);
//$stopwatch->stop($rootTime);
// Gestion des features // FEATURES
//$stopwatch->start($featureTime);
$labelsNValues = explode(",", $matches[3][0]); $labelsNValues = explode(",", $matches[3][0]);
$features = []; $features = [];
foreach ($labelsNValues as $labelNValue) { foreach ($labelsNValues as $labelNValue) {
$featureStringTab = explode("=", $labelNValue); $featureStringTab = explode("=", $labelNValue);
$features[] = $this->fm->findOrCreate($language, $featureStringTab[0], $featureStringTab[1], $featuresToFlush); $features[] = $this->fm->findOrCreate($language, $featureStringTab[0], $featureStringTab[1], $featuresToFlush);
} }
//$stopwatch->stop($featureTime);
// WORD
$this->wm->create($language, $root, $features, $wordValue, $cleanWordValue); $this->wm->create($language, $root, $features, $wordValue, $cleanWordValue);
//$stopwatch->start($startTime);
// Gestion des lettres et débuts de mots // Gestion des lettres et débuts de mots
$wordsLetters = preg_split('//u', $cleanWordValue, null, PREG_SPLIT_NO_EMPTY); $wordsLetters = preg_split('//u', $cleanWordValue, null, PREG_SPLIT_NO_EMPTY);
$wordStartString = ""; $wordStartString = "";
...@@ -198,14 +218,21 @@ class ImportManager ...@@ -198,14 +218,21 @@ class ImportManager
} }
$previousLetter = $wordLetter; $previousLetter = $wordLetter;
} }
//$stopwatch->stop($startTime);
if ($flushCpt == $maxToFlush) { if ($flushCpt == $maxToFlush) {
$flushTime = "start-".$uid1.$uid2;
$stopwatch->start($flushTime);
$this->wm->createStarts($language, $wordStarts); $this->wm->createStarts($language, $wordStarts);
unset($wordStarts);
$wordStarts = null; $wordStarts = null;
$wordStarts = []; $wordStarts = [];
$this->flushAndFreeMemory(); $this->flushAndFreeMemory();
$flushCpt = 1; $flushCpt = 1;
unset($roots);
$roots = null; $roots = null;
$roots = []; $roots = [];
unset($featuresToFlush);
$featuresToFlush = null; $featuresToFlush = null;
$featuresToFlush = []; $featuresToFlush = [];
$languageId = $specs["language_id"]; $languageId = $specs["language_id"];
...@@ -213,9 +240,30 @@ class ImportManager ...@@ -213,9 +240,30 @@ class ImportManager
$percent = round($cpt / $total * 100, 2); $percent = round($cpt / $total * 100, 2);
echo("[".$percent."%] ".$wordValue."\n"); echo("[".$percent."%] ".$wordValue."\n");
$event = $stopwatch->stop($stopwatchName); $event = $stopwatch->stop($stopwatchName);
$stopwatchName = uniqid();
echo "max memory > " . $event->getMemory()/1048576 . " MB \n"; echo "max memory > " . $event->getMemory()/1048576 . " MB \n";
echo "duration > " . $event->getDuration()/1000 . " seconds \n\n"; echo "duration > " . $event->getDuration()/1000 . " seconds \n\n";
/*
$event = $stopwatch->stop($flushTime);
echo "FLUSH duration > " . $event->getDuration()/1000 . " seconds \n";
$event = $stopwatch->start($rootTime);
$event = $stopwatch->stop($rootTime);
echo "ROOT duration > " . $event->getDuration()/1000 . " seconds \n";
$event = $stopwatch->start($featureTime);
$event = $stopwatch->stop($featureTime);
echo "FEATURE duration > " . $event->getDuration()/1000 . " seconds \n";
$event = $stopwatch->start($startTime);
$event = $stopwatch->stop($startTime);
echo "START duration > " . $event->getDuration()/1000 . " seconds \n\n";
$rootTime = "root-".uniqid();
$featureTime = "feature-".uniqid();
$startTime = "start-".uniqid();
*/
$stopwatchName = uniqid();
$stopwatch->start($stopwatchName); $stopwatch->start($stopwatchName);
} }
} }
......
...@@ -27,13 +27,17 @@ class RootManager ...@@ -27,13 +27,17 @@ class RootManager
public function findOrCreate(Language $language, $value, &$roots) public function findOrCreate(Language $language, $value, &$roots)
{ {
if (!isset($roots[$value])) { if (!isset($roots[$value])) {
if (!$root = $this->em->getRepository(Root::class)->findOneBy(['value' => $value, 'language' => $language])) { $rootId = $this->em->getRepository(Root::class)->search($value, $language->getId());
if (empty($rootId)) {
$root = new Root; $root = new Root;
$root->setLanguage($language); $root->setLanguage($language);
$root->setValue($value); $root->setValue($value);
$this->em->persist($root); $this->em->persist($root);
$roots[$value] = $root; $roots[$value] = $root;
} else {
$rootId = $rootId[0]["id"];
$root = $this->em->getRepository(Root::class)->find($rootId);
} }
return $root; return $root;
......
...@@ -37,4 +37,15 @@ class RootRepository extends \Doctrine\ORM\EntityRepository ...@@ -37,4 +37,15 @@ class RootRepository extends \Doctrine\ORM\EntityRepository
return $query->getResult(); return $query->getResult();
} }
public function search($value, $languageId)
{
$sql = "SELECT id FROM lexicon_root WHERE MATCH(value) AGAINST ('".addslashes($value)."' IN NATURAL LANGUAGE MODE) AND language_id = ".$languageId." LIMIT 1";
$em = $this->_em;
$stmt = $em->getConnection()->prepare($sql);
$stmt->execute();
return $stmt->fetchAll();
}
} }
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment