Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Pédagogies Multimodales
wikicolor
Commits
a4b9c825
Commit
a4b9c825
authored
Aug 15, 2021
by
Sylvain Coulange
Browse files
fonctionnel pour coloriser fr en zh ; reste à adapter la recherche/modif des dico
parent
96601bc0
Changes
4
Hide whitespace changes
Inline
Side-by-side
colorapp/liaisons.py
View file @
a4b9c825
...
...
@@ -135,7 +135,7 @@ def obtenirM1M2(mot1, mot2):
# print("Le mot2 qui commence par une lettre voyelle : "+mot2)
return
(
mot1
,
mot2
)
# voir si la première lettre de mot2 est "h"
elif
mot2
[
0
].
lower
()
==
'h'
and
mot2
.
lower
()
in
word2trans
and
"*"
not
in
word2trans
[
mot2
.
lower
()][
0
]
:
elif
mot2
[
0
].
lower
()
==
'h'
and
mot2
.
lower
()
in
word2trans
and
"*"
not
in
list
(
word2trans
[
mot2
.
lower
()][
't'
].
keys
())[
0
]
:
# print("Le mot2 qui commence par 'h' muet : " + mot2)
return
(
mot1
,
mot2
)
else
:
...
...
colorapp/textPhonographer.py
View file @
a4b9c825
...
...
@@ -6,6 +6,7 @@ from collections import OrderedDict
locale
.
setlocale
(
locale
.
LC_ALL
,
""
)
from
sys
import
path
as
pylib
#im naming it as pylib so that we won't get confused between os.path and sys.path
pylib
+=
[
os
.
path
.
relpath
(
r
'../phon2graph'
)]
from
phon2graph_french
import
decoupage
from
phon2graph_english
import
decoupageEn
# ENGLISH
...
...
@@ -20,28 +21,32 @@ phonGraphFileEn = "../phon2graph/data/fidel_wikicolor_en_global.scsv" # ENGLISH
pinyin2apiFile
=
"../phon2graph/data/pinyin2api.json"
# MANDARIN
api2classFile
=
"../phon2graph/data/api2class.json"
# MANDARIN
# Anciens dico json ; aujourd'hui on utilise AlemDic (MongoDB)
# dicFileFr = "../wikiphon/dico_frwiktionary-20200301_v2.json"
# dicFileEn = "../wikiphon/dico_enWikiCmuBritfone.json"
# dicFileZh = "../wikiphon/dico_zhCCDict_20201206_v1.json"
logFile
=
"../logs/dico_frwiktionary-20200301_v2.log"
logFileEn
=
"../logs/dico_enWikiCmuBritfone.log"
# ENGLISH
logBugFile
=
"../logs/wikicolor-bug.log"
logBugFileEn
=
"../logs/wikicolorEn-bug.log"
# ENGLISH
########################################
#########
LECTURE DES FICHIERS
#########
#########
# CONNEXION MONGODB ##
#########
########################################
##### LECTURE DU CODE PHONEME-COULEUR #####
with
open
(
phonColFile
,
"r"
)
as
phonFile
:
phon2class
=
json
.
load
(
phonFile
)
##### LECTURE DES DICTIONNAIRES #####
import
pymongo
def
connexion
():
mdp
=
""
with
open
(
'../private/dbmdp'
,
'r'
)
as
infile
:
mdp
=
infile
.
read
()
print
(
'Connexion à AlemDic...'
)
mongodb_client
=
pymongo
.
MongoClient
(
"mongodb+srv://alemadmin:"
+
mdp
+
"@cluster0.6nopd.mongodb.net/myFirstDatabase?retryWrites=true&w=majority"
)
print
(
mongodb_client
.
list_database_names
())
return
mongodb_client
[
'alemdic'
]
alemdic
=
connexion
()
dicoFr
=
alemdic
[
'dicoFr'
]
dicoEn
=
alemdic
[
'dicoEn'
]
dicoZh
=
alemdic
[
'dicoZh'
]
dicoFrLogs
=
alemdic
[
'dicoFrLogs'
]
dicoEnLogs
=
alemdic
[
'dicoEnLogs'
]
dicoZhLogs
=
alemdic
[
'dicoZhLogs'
]
##### STATS DICTIONNAIRES
def
getLenDic
(
dic
):
lenDic
=
0
for
k
in
dic
.
keys
():
...
...
@@ -56,23 +61,37 @@ def getLenDicEn():
def
getLenDicZh
():
return
getLenDic
(
word2transZh
)
## FR
word2transFr
=
{}
# un mot → liste de trans possibles
with
open
(
dicFileFr
,
'r'
)
as
f
:
word2transFr
=
json
.
load
(
f
)
print
(
"Nombre d'entrées dans le dictionnaire de français :"
,
getLenDic
(
word2transFr
))
##### MISE EN MÉMOIRE DES DICTIONNAIRES (seulement les transcriptions+regions)
def
makeWord2trans
(
alemDic
):
query
=
alemDic
.
find
()
word2trans
=
{}
for
q
in
query
:
if
q
[
'w'
]
not
in
word2trans
.
keys
():
word2trans
[
q
[
'w'
]]
=
{
"t"
:
q
[
't'
]
}
# pas besoin de mettre en mémoire le reste pour l'instant (catégories, traductions, fantizi...)
else
:
print
(
'DOUBLON DÉTECTÉ :'
,
q
[
'w'
])
return
word2trans
## EN
word2transEn
=
{}
# un mot → liste de trans possibles
with
open
(
dicFileEn
,
'r'
)
as
f
:
word2transEn
=
json
.
load
(
f
)
print
(
"Nombre d'entrées dans le dictionnaire d'anglais :"
,
getLenDic
(
word2transEn
))
print
(
"Mise en mémoire de dicoFr..."
)
word2transFr
=
makeWord2trans
(
dicoFr
)
print
(
"Nombre d'entrées dans AlemDic-dicoFr :"
,
getLenDic
(
word2transFr
))
print
(
"Mise en mémoire de dicoEn..."
)
word2transEn
=
makeWord2trans
(
dicoEn
)
print
(
"Nombre d'entrées dans AlemDic-dicoEn :"
,
getLenDic
(
word2transEn
))
## ZH
word2transZh
=
{}
# un mot → une transcription pinyin
with
open
(
dicFileZh
,
'r'
)
as
f
:
word2transZh
=
json
.
load
(
f
)
print
(
"Nombre d'entrées dans le dictionnaire de mandarin :"
,
getLenDic
(
word2transZh
))
print
(
"Mise en mémoire de dicoZh..."
)
word2transZh
=
makeWord2trans
(
dicoZh
)
print
(
"Nombre d'entrées dans AlemDic-dicoZh :"
,
getLenDic
(
word2transZh
))
########################################
######### LECTURE DES FICHIERS #########
########################################
##### LECTURE DU CODE PHONEME-COULEUR #####
with
open
(
phonColFile
,
"r"
)
as
phonFile
:
phon2class
=
json
.
load
(
phonFile
)
##### LECTURE DES LISTES PHONEME-GRAPHIES (FIDEL) #####
...
...
@@ -116,62 +135,25 @@ with open(phonGraphFileEn,mode="r") as phonFileEn:
##### LECTURE DES LOGS #####
## FR
logDicFr
=
{}
with
open
(
logFile
,
'r'
)
as
logf
:
logDicFr
=
json
.
load
(
logf
)
cptEdit
=
0
for
i
,
j
in
logDicFr
.
items
():
for
k
in
j
:
cptEdit
+=
1
print
(
"Nombre de modifications du dictionnaire de français :"
,
cptEdit
)
def
getLogStat
():
def
getLogStat
(
lang
):
if
lang
==
"fr"
:
alemDicLogs
=
dicoFrLogs
if
lang
==
"en"
:
alemDicLogs
=
dicoEnLogs
if
lang
==
"zh"
:
alemDicLogs
=
dicoZhLogs
logStat
=
LogStat
()
logStat
.
cptEdit
=
0
logStat
.
cptModif
=
0
logStat
.
cptAdd
=
0
logStat
.
cptDel
=
0
for
i
,
j
in
logDicFr
.
items
():
for
k
in
j
:
logStat
.
cptEdit
+=
1
if
k
[
'Type'
]
==
"MODIF"
:
logStat
.
cptModif
+=
1
if
k
[
'Type'
]
==
"ADD"
:
logStat
.
cptAdd
+=
1
if
k
[
'Type'
]
==
"DEL"
:
logStat
.
cptDel
+=
1
for
log
in
alemDicLogs
.
find
():
logStat
.
cptEdit
+=
1
if
log
[
'type'
]
==
"MODIF"
:
logStat
.
cptModif
+=
1
if
log
[
'type'
]
==
"ADD"
:
logStat
.
cptAdd
+=
1
if
log
[
'type'
]
==
"DEL"
:
logStat
.
cptDel
+=
1
return
logStat
logBug
=
{}
with
open
(
logBugFile
,
'r'
)
as
logf
:
logBug
=
json
.
load
(
logf
)
print
(
"Nombre de bug d'alignement enregistrés en français :"
,
len
(
logBug
))
## EN
logDicEn
=
{}
with
open
(
logFileEn
,
'r'
)
as
logf
:
logDicEn
=
json
.
load
(
logf
)
cptEditEn
=
0
for
i
,
j
in
logDicEn
.
items
():
for
k
in
j
:
cptEditEn
+=
1
print
(
"Nombre de modifications du dictionnaire d'anglais :"
,
cptEditEn
)
def
getLogStatEn
():
logStatEn
=
LogStat
()
logStatEn
.
cptEdit
=
0
logStatEn
.
cptModif
=
0
logStatEn
.
cptAdd
=
0
logStatEn
.
cptDel
=
0
for
i
,
j
in
logDicEn
.
items
():
for
k
in
j
:
logStatEn
.
cptEdit
+=
1
if
k
[
'Type'
]
==
"MODIF"
:
logStatEn
.
cptModif
+=
1
if
k
[
'Type'
]
==
"ADD"
:
logStatEn
.
cptAdd
+=
1
if
k
[
'Type'
]
==
"DEL"
:
logStatEn
.
cptDel
+=
1
return
logStatEn
logBugEn
=
{}
with
open
(
logBugFileEn
,
'r'
)
as
logf
:
logBugEn
=
json
.
load
(
logf
)
print
(
"Nombre de bug d'alignement enregistrés en anglais :"
,
len
(
logBugEn
))
print
(
"Nombre de modifications du dictionnaire de français :"
,
getLogStat
(
'fr'
).
cptEdit
)
print
(
"Nombre de modifications du dictionnaire d'anglais :"
,
getLogStat
(
'en'
).
cptEdit
)
print
(
"Nombre de modifications du dictionnaire de mandarin :"
,
getLogStat
(
'zh'
).
cptEdit
)
##### SUPPLEMENTS POUR MANDARIN #####
with
open
(
pinyin2apiFile
)
as
inFile
:
...
...
@@ -226,12 +208,12 @@ def traitement(mot, lang, liaison): # LIAISON : avec le caractere liaison en arg
elif
mot
in
word2trans
.
keys
():
print
(
"'"
,
mot
,
"' trouvé dans le dico !"
,
word2trans
[
mot
])
if
lang
==
"fr"
:
transList
=
word2trans
[
mot
]
elif
lang
==
"en"
:
if
lang
in
[
"fr"
,
"en"
]:
transList
=
[]
for
trans
,
locs
in
word2trans
[
mot
].
items
():
transList
.
append
((
trans
,
locs
))
for
trans
,
infos
in
word2trans
[
mot
][
't'
].
items
():
transList
.
append
((
trans
.
replace
(
"_"
,
""
),
infos
[
'r'
]))
# SUPPRESSION SYMBOLE SYLLABE ICI '_'
print
(
"TRANS"
,
trans
,
trans
.
replace
(
"_"
,
""
))
print
(
"LOCS"
,
infos
[
'r'
])
result
=
[]
############ partie d'appel de la fonction denasalisation
...
...
@@ -245,19 +227,17 @@ def traitement(mot, lang, liaison): # LIAISON : avec le caractere liaison en arg
for
trans
in
transList
:
if
lang
==
"fr"
:
res
=
decoupage
(
mot
,
trans
,
phon2graph
,
phon2class
)
ll
=
"Fr"
tt
=
trans
res
=
decoupage
(
mot
,
trans
[
0
]
,
phon2graph
,
phon2class
)
ll
=
trans
[
1
]
tt
=
trans
[
0
]
msg
=
""
elif
lang
==
"en"
:
mot
=
mot
.
replace
(
"'"
,
'’'
)
res
,
msg
=
decoupageEn
(
mot
,
trans
[
0
],
phon2graph
,
phon2class
,
True
)
#
True to get live log
res
,
msg
=
decoupageEn
(
mot
,
trans
[
0
],
phon2graph
,
phon2class
)
# add ,
True to get live log
ll
=
trans
[
1
]
tt
=
trans
[
0
]
result
.
append
((
res
,
ll
,
tt
,
msg
))
for
graphie
in
res
:
if
re
.
match
(
r
'phon\_echec'
,
graphie
[
0
]):
writeLogBugs
(
mot
,
lang
)
else
:
print
(
"'"
,
mot
,
"' non trouvé !"
)
result
=
[([(
'phon_inconnu'
,
mot
)],
""
,
""
,
"Mot non trouvé dans le dictionnaire"
)]
...
...
@@ -282,7 +262,7 @@ def traitementzh(mot):
result
=
[]
# liste type : [[car, api, phonlist, ton], [car, api, phonlist, ton]...]
if
mot
in
word2transZh
.
keys
():
pinyinOutput
=
word2transZh
[
mot
][
"
p"
].
lower
()
pinyinOutput
=
list
(
word2transZh
[
mot
][
"
t"
].
keys
())[
0
].
lower
()
print
(
"Mot trouvé dans le dictionnaire :"
,
mot
,
pinyinOutput
)
else
:
...
...
@@ -344,7 +324,7 @@ def getEntryByWord(m,mc,t,tc,lang):
for
entree
,
transList
in
word2trans
.
items
():
if
re
.
match
(
mc1
+
m
+
mc2
,
entree
):
transOK
=
False
for
i
,
trans
in
enumerate
(
transList
):
for
i
,
trans
in
enumerate
(
list
(
transList
[
't'
].
keys
())
):
if
re
.
match
(
tc1
+
t
+
tc2
,
trans
.
replace
(
'.'
,
''
)):
transOK
=
True
if
transOK
:
result
[
entree
]
=
transList
...
...
colorapp/views.py
View file @
a4b9c825
...
...
@@ -5,9 +5,14 @@ from django.http import JsonResponse
from
.liaisons
import
*
import
json
,
spacy
,
subprocess
,
re
print
(
'Chargement des modèles de langue...'
)
print
(
'fr_core_news_md...'
)
nlpFr
=
spacy
.
load
(
'fr_core_news_md'
)
print
(
'en_core_web_sm...'
)
nlpEn
=
spacy
.
load
(
"en_core_web_sm"
)
print
(
'zh_core_web_sm...'
)
nlpZh
=
spacy
.
load
(
"zh_core_web_sm"
)
print
(
'OK.'
)
logFile
=
"../logs/dico_frwiktionary-20200301_v2.log"
def
redirApp
(
request
):
...
...
@@ -233,7 +238,7 @@ def dicoViewFr(request):
data
=
Entree
()
data
.
update
=
updateTime
()
data
.
lenDic
=
txtphono
.
getLenDicFr
()
data
.
logStat
=
txtphono
.
getLogStat
()
data
.
logStat
=
txtphono
.
getLogStat
(
'fr'
)
data
.
dicoLang
=
"fr"
return
render
(
request
,
'editDico.html'
,
{
'data'
:
data
})
...
...
@@ -241,7 +246,7 @@ def dicoViewEn(request):
data
=
Entree
()
data
.
update
=
updateTime
()
data
.
lenDic
=
txtphono
.
getLenDicEn
()
data
.
logStat
=
txtphono
.
getLogStat
En
(
)
data
.
logStat
=
txtphono
.
getLogStat
(
'en'
)
data
.
dicoLang
=
"en"
return
render
(
request
,
'editDico.html'
,
{
'data'
:
data
})
...
...
templates/editDico.html
View file @
a4b9c825
...
...
@@ -59,7 +59,7 @@
</tr>
</table>
<center><div
id=
"loader"
class=
"loader"
style=
"display: none;"
></div></center>
<button
id=
"btnShowMore"
class=
"btn btn-success"
style=
"margin: 10px;display: none;"
title=
"afficher plus de résultats"
><svg
xmlns=
"http://www.w3.org/2000/svg"
width=
"30"
height=
"30"
fill=
"
green
"
class=
"bi bi-plus-circle"
viewBox=
"0 0 16 16"
>
<path
d=
"M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"
/>
<path
d=
"M8 4a.5.5 0 0 1 .5.5v3h3a.5.5 0 0 1 0 1h-3v3a.5.5 0 0 1-1 0v-3h-3a.5.5 0 0 1 0-1h3v-3A.5.5 0 0 1 8 4z"
/></svg></button>
<button
id=
"btnShowMore"
class=
"btn btn-success"
style=
"margin: 10px;display: none;
color:white
"
title=
"afficher plus de résultats"
><svg
xmlns=
"http://www.w3.org/2000/svg"
width=
"30"
height=
"30"
fill=
"
white
"
class=
"bi bi-plus-circle"
viewBox=
"0 0 16 16"
>
<path
d=
"M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"
/>
<path
d=
"M8 4a.5.5 0 0 1 .5.5v3h3a.5.5 0 0 1 0 1h-3v3a.5.5 0 0 1-1 0v-3h-3a.5.5 0 0 1 0-1h3v-3A.5.5 0 0 1 8 4z"
/></svg></button>
<!-- <button id="btnAddEntry" class="btn btn-primary" style="margin: 10px;" title="Ajouter une entrée">Ajouter une entrée</button> -->
<div
id=
"actionBlock"
style=
"display: none;"
><div
class=
"loader mainLoad"
></div></div>
<div
class=
"divAddEntree"
>
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment