Commit 15094b3c authored by arnaudbey's avatar arnaudbey
Browse files

retrie cmds from magixword bundle

parent a7d439e8
<?php
namespace Innova\LexiconBundle\Command;
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use MagicWordBundle\Entity\Lexicon\Lemma;
use MagicWordBundle\Entity\Lexicon\Inflection;
class EnglishImportCommand extends ContainerAwareCommand
{
protected function configure()
{
$this
->setName('lexicon:import-english')
->setDescription('import english')
;
}
protected function execute(InputInterface $input, OutputInterface $output)
{
$em = $this->getContainer()->get('doctrine')->getEntityManager('default');
$enForms = $em->getRepository('InnovaLexiconBundle:Inflection')->getEnglishForms();
foreach ($enForms as $enForm) {
$lemma = $this->handleLemma($enForm, $output);
if ($lemma) {
preg_match_all('/:[^:]+/', $enForm['features'], $features);
foreach ($features as $feature) {
foreach ($feature as $f) {
$inflection = $this->handleInflection($f, $lemma, $enForm['form'], $output);
}
}
$em->clear();
}
}
}
private function handleInflection($subcat, $lemma, $form, $output)
{
$em = $this->getContainer()->get('doctrine')->getEntityManager('default');
$inflectionManager = $this->getContainer()->get('innova_lexicon_inflection');
$genderNames = array(
'm' => 'masculine',
'f' => 'feminine',
'n' => 'neutral',
);
$numberNames = array(
's' => 'singular',
'p' => 'plural',
);
$personNames = array(
'1' => 'firstPerson',
'2' => 'secondPerson',
'3' => 'thirdPerson',
);
$tenseNames = array(
'W' => null,
'P' => 'present',
'K' => 'past',
'I' => 'simplePast',
'G' => 'present',
'C' => null,
'S' => null,
);
$moodNames = array(
'W' => 'infinitive',
'P' => 'indicative',
'K' => 'participle',
'I' => 'indicative',
'G' => 'participle',
'C' => null,
'S' => null,
);
$gender = null;
$number = null;
$person = null;
$tense = null;
$mood = null;
$subcats = str_split($subcat);
foreach ($subcats as $subcat) {
if (isset($genderNames[$subcat])) {
$gender = $em->getRepository('InnovaLexiconBundle:Gender')->findOneByValue($genderNames[$subcat]);
} elseif (isset($numberNames[$subcat])) {
$number = $em->getRepository('InnovaLexiconBundle:Number')->findOneByValue($numberNames[$subcat]);
} elseif (isset($personNames[$subcat])) {
$person = $em->getRepository('InnovaLexiconBundle:Person')->findOneByValue($personNames[$subcat]);
} else {
if (isset($tenseNames[$subcat])) {
$tense = $em->getRepository('InnovaLexiconBundle:Tense')->findOneByValue($tenseNames[$subcat]);
}
if (isset($moodNames[$subcat])) {
$mood = $em->getRepository('InnovaLexiconBundle:Mood')->findOneByValue($moodNames[$subcat]);
}
}
}
$language = $em->getRepository('InnovaLexiconBundle:Language')->find(2);
$criteria = array_filter([
'lemma' => $lemma,
'gender' => $gender,
'number' => $number,
'person' => $person,
'tense' => $tense,
'mood' => $mood,
]);
$inflection = $em->getRepository('InnovaLexiconBundle:Inflection')->findOneBy($criteria);
if (!$inflection) {
$inflection = new Inflection();
$inflection->setLemma($lemma);
$inflection->setGender($gender);
$inflection->setNumber($number);
$inflection->setPerson($person);
$inflection->setTense($tense);
$inflection->setMood($mood);
$inflection->setLanguage($language);
$inflection->setContent($form);
$inflection->setPhonetic1('');
$inflection->setPhonetic2('');
//$inflection->setStatus('');
$inflection->setCleanedContent($inflectionManager->getCleanContent($form));
$em->persist($inflection);
$em->flush();
$output->writeln('<info> insert form:'.$form.'</info>');
} else {
$output->writeln('<info> ignore form:'.$form.'</info>');
}
}
private function handleLemma($enForm, $output)
{
$em = $this->getContainer()->get('doctrine')->getEntityManager('default');
$cat = array(
'noun' => 'CommonNoun',
'verb' => 'verb',
'adjective' => 'adjective',
'interjection' => 'interjection',
'adverb' => 'adverb',
'preposition' => 'preposition',
'preposition article' => 'preposition',
'preposition determinant' => 'preposition',
'determiner' => 'determiner',
'external' => null,
'conjunction' => 'conjunction',
'pronoun' => 'pronoun',
'prefix' => null,
'verbal adjective' => 'adjective',
'nominal adjective' => 'adjective',
'subordinating conjunction' => 'conjunction',
);
$subcat = array(
'preposition determinant' => 'determinant',
'subordinating conjunction' => 'subordination',
);
$lemmaContent = $enForm['lemma'];
$lemmaCat = $enForm['lemtype'];
$lemmaSubcatName = isset($subcat[$lemmaCat])
? $subcat[$lemmaCat]
: null;
$lemmaCatName = $cat[$lemmaCat];
$lemmaCat = $em->getRepository('InnovaLexiconBundle:Category')->findOneByValue($lemmaCatName);
$lemmaSubcat = $em->getRepository('InnovaLexiconBundle:Subcategory')->findOneByValue($lemmaSubcatName);
$language = $em->getRepository('InnovaLexiconBundle:Language')->find(2);
$lemma = $em->getRepository('InnovaLexiconBundle:Lemma')->findOneBy([
'language' => $language,
'content' => $lemmaContent,
'category' => $lemmaCat,
'subcategory' => $lemmaSubcat,
]);
if ($lemmaCat && !$lemma) {
$lemma = new Lemma();
$lemma->setLanguage($language);
$lemma->setContent($lemmaContent);
$lemma->setCategory($lemmaCat);
$lemma->setLocution(0);
//$lemma->setProcessStatus(0);
$lemma->setPhonetic1('');
$lemma->setPhonetic2('');
$lemma->setSubcategory($lemmaSubcat);
//$lemma->setStatus('');
$em->persist($lemma);
$em->flush();
$output->writeln('<info> insert lemma: '.$lemmaContent.'</info>');
} else {
$output->writeln('<info> ignore lemma: '.$lemmaContent.'</info>');
}
return $lemma;
}
}
<?php
namespace Innova\LexiconBundle\Command;
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Input\InputArgument;
class InflectionCommand extends ContainerAwareCommand
{
protected function configure()
{
$this
->setName('lexicon:populate-cleancontent')
->setDescription('populate clean content')
->addArgument('start', InputArgument::REQUIRED, 'start')
;
}
protected function execute(InputInterface $input, OutputInterface $output)
{
$start = $input->getArgument('start');
$em = $this->getContainer()->get('doctrine')->getEntityManager('default');
$inflections = $em->getRepository('InnovaLexiconBundle:Inflection')->findByIdRange($start);
$this->parseInflections($inflections, $start, $output);
$output->writeln('<info>Done</info>');
}
private function stripAccents($str)
{
$str = mb_strtolower($str, 'UTF-8');
$str = str_replace(
array(
'à', 'â', 'ä', 'á', 'ã', 'å',
'î', 'ï', 'ì', 'í',
'ô', 'ö', 'ò', 'ó', 'õ', 'ø',
'ù', 'û', 'ü', 'ú',
'é', 'è', 'ê', 'ë',
'ç', 'ÿ', 'ñ', 'œ',
),
array(
'a', 'a', 'a', 'a', 'a', 'a',
'i', 'i', 'i', 'i',
'o', 'o', 'o', 'o', 'o', 'o',
'u', 'u', 'u', 'u',
'e', 'e', 'e', 'e',
'c', 'y', 'n', 'oe',
),
$str
);
return $str;
}
private function parseInflections($inflections, $i, $output)
{
if ($inflections) {
$em = $this->getContainer()->get('doctrine')->getEntityManager('default');
$inflectionManager = $this->getContainer()->get('innova_lexicon_inflection');
$output->writeln('<info>... ('.$i.')</info>');
foreach ($inflections as $inflection) {
$cleanedContent = $inflectionManager->getCleanContent($inflection->getContent());
$this->populateStart($output, $cleanedContent, $inflection->getLanguage());
$inflection->setCleanedContent($cleanedContent);
$em->persist($inflection);
}
$em->flush();
$em->clear();
$i = $i + 20000;
$inflections = $em->getRepository('InnovaLexiconBundle:Inflection')->findByIdRange($i);
$output->writeln('<info>... ('.$i.')</info>');
$this->parseInflections($inflections, $i, $output);
}
}
private function populateStart($output, $cleanedContent, $language)
{
$em = $this->getContainer()->get('doctrine')->getEntityManager('default');
$startRepo = $em->getRepository('InnovaLexiconBundle:InflectionStart');
$letters = preg_split('//', $cleanedContent, -1, PREG_SPLIT_NO_EMPTY);
$languageId = $language->getId();
for ($i = 0; $i < count($letters); ++$i) {
$substr = addslashes(substr($cleanedContent, 0, $i + 1));
if (strlen($substr) > 1 && !$startRepo->search($substr, $languageId)) {
$startRepo->insert($substr, $languageId);
}
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment