Commit a4b9c825 authored by Sylvain Coulange's avatar Sylvain Coulange
Browse files

fonctionnel pour coloriser fr en zh ; reste à adapter la recherche/modif des dico

parent 96601bc0
......@@ -135,7 +135,7 @@ def obtenirM1M2(mot1, mot2):
# print("Le mot2 qui commence par une lettre voyelle : "+mot2)
return (mot1, mot2)
# voir si la première lettre de mot2 est "h"
elif mot2[0].lower() == 'h' and mot2.lower() in word2trans and "*" not in word2trans[mot2.lower()][0] :
elif mot2[0].lower() == 'h' and mot2.lower() in word2trans and "*" not in list(word2trans[mot2.lower()]['t'].keys())[0] :
# print("Le mot2 qui commence par 'h' muet : " + mot2)
return (mot1, mot2)
else:
......
......@@ -6,6 +6,7 @@ from collections import OrderedDict
locale.setlocale(locale.LC_ALL, "")
from sys import path as pylib #im naming it as pylib so that we won't get confused between os.path and sys.path
pylib += [os.path.relpath(r'../phon2graph')]
from phon2graph_french import decoupage
from phon2graph_english import decoupageEn # ENGLISH
......@@ -20,28 +21,32 @@ phonGraphFileEn = "../phon2graph/data/fidel_wikicolor_en_global.scsv" # ENGLISH
pinyin2apiFile = "../phon2graph/data/pinyin2api.json" # MANDARIN
api2classFile = "../phon2graph/data/api2class.json" # MANDARIN
# Anciens dico json ; aujourd'hui on utilise AlemDic (MongoDB)
# dicFileFr = "../wikiphon/dico_frwiktionary-20200301_v2.json"
# dicFileEn = "../wikiphon/dico_enWikiCmuBritfone.json"
# dicFileZh = "../wikiphon/dico_zhCCDict_20201206_v1.json"
logFile = "../logs/dico_frwiktionary-20200301_v2.log"
logFileEn = "../logs/dico_enWikiCmuBritfone.log" # ENGLISH
logBugFile = "../logs/wikicolor-bug.log"
logBugFileEn = "../logs/wikicolorEn-bug.log" # ENGLISH
########################################
######### LECTURE DES FICHIERS #########
########## CONNEXION MONGODB ###########
########################################
##### LECTURE DU CODE PHONEME-COULEUR #####
with open(phonColFile,"r") as phonFile:
phon2class = json.load(phonFile)
##### LECTURE DES DICTIONNAIRES #####
import pymongo
def connexion():
mdp = ""
with open('../private/dbmdp','r') as infile:
mdp = infile.read()
print('Connexion à AlemDic...')
mongodb_client = pymongo.MongoClient("mongodb+srv://alemadmin:"+mdp+"@cluster0.6nopd.mongodb.net/myFirstDatabase?retryWrites=true&w=majority")
print(mongodb_client.list_database_names())
return mongodb_client['alemdic']
alemdic = connexion()
dicoFr = alemdic['dicoFr']
dicoEn = alemdic['dicoEn']
dicoZh = alemdic['dicoZh']
dicoFrLogs = alemdic['dicoFrLogs']
dicoEnLogs = alemdic['dicoEnLogs']
dicoZhLogs = alemdic['dicoZhLogs']
##### STATS DICTIONNAIRES
def getLenDic(dic):
lenDic = 0
for k in dic.keys():
......@@ -56,23 +61,37 @@ def getLenDicEn():
def getLenDicZh():
return getLenDic(word2transZh)
## FR
word2transFr = {} # un mot → liste de trans possibles
with open(dicFileFr, 'r') as f:
word2transFr = json.load(f)
print("Nombre d'entrées dans le dictionnaire de français :", getLenDic(word2transFr))
##### MISE EN MÉMOIRE DES DICTIONNAIRES (seulement les transcriptions+regions)
def makeWord2trans(alemDic):
query = alemDic.find()
word2trans = {}
for q in query:
if q['w'] not in word2trans.keys():
word2trans[q['w']] = { "t" : q['t'] } # pas besoin de mettre en mémoire le reste pour l'instant (catégories, traductions, fantizi...)
else:
print('DOUBLON DÉTECTÉ :', q['w'])
return word2trans
## EN
word2transEn = {} # un mot → liste de trans possibles
with open(dicFileEn, 'r') as f:
word2transEn = json.load(f)
print("Nombre d'entrées dans le dictionnaire d'anglais :", getLenDic(word2transEn))
print("Mise en mémoire de dicoFr...")
word2transFr = makeWord2trans(dicoFr)
print("Nombre d'entrées dans AlemDic-dicoFr :", getLenDic(word2transFr))
print("Mise en mémoire de dicoEn...")
word2transEn = makeWord2trans(dicoEn)
print("Nombre d'entrées dans AlemDic-dicoEn :", getLenDic(word2transEn))
## ZH
word2transZh = {} # un mot → une transcription pinyin
with open(dicFileZh, 'r') as f:
word2transZh = json.load(f)
print("Nombre d'entrées dans le dictionnaire de mandarin :", getLenDic(word2transZh))
print("Mise en mémoire de dicoZh...")
word2transZh = makeWord2trans(dicoZh)
print("Nombre d'entrées dans AlemDic-dicoZh :", getLenDic(word2transZh))
########################################
######### LECTURE DES FICHIERS #########
########################################
##### LECTURE DU CODE PHONEME-COULEUR #####
with open(phonColFile,"r") as phonFile:
phon2class = json.load(phonFile)
##### LECTURE DES LISTES PHONEME-GRAPHIES (FIDEL) #####
......@@ -116,62 +135,25 @@ with open(phonGraphFileEn,mode="r") as phonFileEn:
##### LECTURE DES LOGS #####
## FR
logDicFr = {}
with open(logFile, 'r') as logf:
logDicFr = json.load(logf)
cptEdit = 0
for i,j in logDicFr.items():
for k in j:
cptEdit+=1
print("Nombre de modifications du dictionnaire de français :", cptEdit)
def getLogStat():
def getLogStat(lang):
if lang=="fr": alemDicLogs = dicoFrLogs
if lang=="en": alemDicLogs = dicoEnLogs
if lang=="zh": alemDicLogs = dicoZhLogs
logStat = LogStat()
logStat.cptEdit = 0
logStat.cptModif = 0
logStat.cptAdd = 0
logStat.cptDel = 0
for i,j in logDicFr.items():
for k in j:
logStat.cptEdit+=1
if k['Type'] == "MODIF" : logStat.cptModif += 1
if k['Type'] == "ADD" : logStat.cptAdd += 1
if k['Type'] =="DEL" : logStat.cptDel += 1
for log in alemDicLogs.find():
logStat.cptEdit+=1
if log['type'] == "MODIF" : logStat.cptModif += 1
if log['type'] == "ADD" : logStat.cptAdd += 1
if log['type'] == "DEL" : logStat.cptDel += 1
return logStat
logBug = {}
with open(logBugFile, 'r') as logf:
logBug = json.load(logf)
print("Nombre de bug d'alignement enregistrés en français :", len(logBug))
## EN
logDicEn = {}
with open(logFileEn, 'r') as logf:
logDicEn = json.load(logf)
cptEditEn = 0
for i,j in logDicEn.items():
for k in j:
cptEditEn+=1
print("Nombre de modifications du dictionnaire d'anglais :", cptEditEn)
def getLogStatEn():
logStatEn = LogStat()
logStatEn.cptEdit = 0
logStatEn.cptModif = 0
logStatEn.cptAdd = 0
logStatEn.cptDel = 0
for i,j in logDicEn.items():
for k in j:
logStatEn.cptEdit+=1
if k['Type'] == "MODIF" : logStatEn.cptModif += 1
if k['Type'] == "ADD" : logStatEn.cptAdd += 1
if k['Type'] =="DEL" : logStatEn.cptDel += 1
return logStatEn
logBugEn = {}
with open(logBugFileEn, 'r') as logf:
logBugEn = json.load(logf)
print("Nombre de bug d'alignement enregistrés en anglais :", len(logBugEn))
print("Nombre de modifications du dictionnaire de français :", getLogStat('fr').cptEdit)
print("Nombre de modifications du dictionnaire d'anglais :", getLogStat('en').cptEdit)
print("Nombre de modifications du dictionnaire de mandarin :", getLogStat('zh').cptEdit)
##### SUPPLEMENTS POUR MANDARIN #####
with open(pinyin2apiFile) as inFile:
......@@ -226,12 +208,12 @@ def traitement(mot, lang, liaison): # LIAISON : avec le caractere liaison en arg
elif mot in word2trans.keys():
print("'", mot, "' trouvé dans le dico !",word2trans[mot])
if lang == "fr":
transList = word2trans[mot]
elif lang == "en":
if lang in ["fr","en"]:
transList = []
for trans, locs in word2trans[mot].items():
transList.append((trans,locs))
for trans, infos in word2trans[mot]['t'].items():
transList.append((trans.replace("_",""),infos['r'])) # SUPPRESSION SYMBOLE SYLLABE ICI '_'
print("TRANS",trans, trans.replace("_",""))
print("LOCS", infos['r'])
result = []
############ partie d'appel de la fonction denasalisation
......@@ -245,19 +227,17 @@ def traitement(mot, lang, liaison): # LIAISON : avec le caractere liaison en arg
for trans in transList:
if lang == "fr":
res = decoupage(mot,trans,phon2graph,phon2class)
ll = "Fr"
tt = trans
res = decoupage(mot,trans[0],phon2graph,phon2class)
ll = trans[1]
tt = trans[0]
msg = ""
elif lang == "en":
mot = mot.replace("'",'’')
res, msg = decoupageEn(mot,trans[0],phon2graph,phon2class,True) # True to get live log
res, msg = decoupageEn(mot,trans[0],phon2graph,phon2class) # add ,True to get live log
ll = trans[1]
tt = trans[0]
result.append((res,ll,tt,msg))
for graphie in res:
if re.match(r'phon\_echec',graphie[0]):
writeLogBugs(mot, lang)
else:
print("'", mot, "' non trouvé !")
result = [([('phon_inconnu',mot)],"","","Mot non trouvé dans le dictionnaire")]
......@@ -282,7 +262,7 @@ def traitementzh(mot):
result = [] # liste type : [[car, api, phonlist, ton], [car, api, phonlist, ton]...]
if mot in word2transZh.keys():
pinyinOutput = word2transZh[mot]["p"].lower()
pinyinOutput = list(word2transZh[mot]["t"].keys())[0].lower()
print("Mot trouvé dans le dictionnaire :", mot, pinyinOutput)
else:
......@@ -344,7 +324,7 @@ def getEntryByWord(m,mc,t,tc,lang):
for entree,transList in word2trans.items():
if re.match(mc1+m+mc2,entree):
transOK = False
for i,trans in enumerate(transList):
for i,trans in enumerate(list(transList['t'].keys())):
if re.match(tc1+t+tc2,trans.replace('.','')):
transOK = True
if transOK: result[entree] = transList
......
......@@ -5,9 +5,14 @@ from django.http import JsonResponse
from .liaisons import *
import json, spacy, subprocess, re
print('Chargement des modèles de langue...')
print('fr_core_news_md...')
nlpFr = spacy.load('fr_core_news_md')
print('en_core_web_sm...')
nlpEn = spacy.load("en_core_web_sm")
print('zh_core_web_sm...')
nlpZh = spacy.load("zh_core_web_sm")
print('OK.')
logFile = "../logs/dico_frwiktionary-20200301_v2.log"
def redirApp(request):
......@@ -233,7 +238,7 @@ def dicoViewFr(request):
data = Entree()
data.update = updateTime()
data.lenDic = txtphono.getLenDicFr()
data.logStat = txtphono.getLogStat()
data.logStat = txtphono.getLogStat('fr')
data.dicoLang = "fr"
return render(request, 'editDico.html', {'data': data})
......@@ -241,7 +246,7 @@ def dicoViewEn(request):
data = Entree()
data.update = updateTime()
data.lenDic = txtphono.getLenDicEn()
data.logStat = txtphono.getLogStatEn()
data.logStat = txtphono.getLogStat('en')
data.dicoLang = "en"
return render(request, 'editDico.html', {'data': data})
......
......@@ -59,7 +59,7 @@
</tr>
</table>
<center><div id="loader" class="loader" style="display: none;"></div></center>
<button id="btnShowMore" class="btn btn-success" style="margin: 10px;display: none;" title="afficher plus de résultats"><svg xmlns="http://www.w3.org/2000/svg" width="30" height="30" fill="green" class="bi bi-plus-circle" viewBox="0 0 16 16"> <path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/> <path d="M8 4a.5.5 0 0 1 .5.5v3h3a.5.5 0 0 1 0 1h-3v3a.5.5 0 0 1-1 0v-3h-3a.5.5 0 0 1 0-1h3v-3A.5.5 0 0 1 8 4z"/></svg></button>
<button id="btnShowMore" class="btn btn-success" style="margin: 10px;display: none; color:white" title="afficher plus de résultats"><svg xmlns="http://www.w3.org/2000/svg" width="30" height="30" fill="white" class="bi bi-plus-circle" viewBox="0 0 16 16"> <path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/> <path d="M8 4a.5.5 0 0 1 .5.5v3h3a.5.5 0 0 1 0 1h-3v3a.5.5 0 0 1-1 0v-3h-3a.5.5 0 0 1 0-1h3v-3A.5.5 0 0 1 8 4z"/></svg></button>
<!-- <button id="btnAddEntry" class="btn btn-primary" style="margin: 10px;" title="Ajouter une entrée">Ajouter une entrée</button> -->
<div id="actionBlock" style="display: none;"><div class="loader mainLoad"></div></div>
<div class="divAddEntree">
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment