Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Pédagogies Multimodales
wikicolor
Commits
b44ad433
Commit
b44ad433
authored
Dec 07, 2020
by
Sylvain Coulange
Browse files
Ajout mandarin
parent
3ebe17b4
Changes
12
Hide whitespace changes
Inline
Side-by-side
REQUIREMENTS.txt
View file @
b44ad433
asgiref==3.2.10
blis==0.4.1
catalogue==0.0
.8
certifi==20
19.11.28
catalogue==
1.
0.0
certifi==20
20.4.5.1
chardet==3.0.4
cymem==2.0.3
Django==2.1
fr-core-news-md==2.2.5
fr-core-news-sm==2.2.5
idna==2.8
importlib-metadata==1.3.0
more-itertools==8.0.2
Cython==0.29.21
Django==3.1.2
django-cors-headers==3.5.0
dnspython==2.0.0
en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.3.1/en_core_web_md-2.3.1.tar.gz
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
fr-core-news-md @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-2.3.0/fr_core_news_md-2.3.0.tar.gz
fr-core-news-sm @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-2.2.5/fr_core_news_sm-2.2.5.tar.gz
idna==2.9
importlib-metadata==1.6.0
jieba==0.42.1
murmurhash==1.0.2
numpy==1.18.0
numpy==1.18.4
pinyin==0.4.0
pkuseg==0.0.25
plac==1.1.3
preshed==3.0.2
pymongo==3.11.0
pytz==2019.3
requests==2.22.0
spacy==2.2.3
srsly==0.2.0
thinc==7.3.1
tqdm==4.41.0
urllib3==1.25.7
wasabi==0.4.2
zipp==0.6.0
requests==2.23.0
spacy==2.3.4
sqlparse==0.4.1
srsly==1.0.2
thinc==7.4.3
tqdm==4.46.0
urllib3==1.25.9
wasabi==0.6.0
zh-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-2.3.1/zh_core_web_sm-2.3.1.tar.gz
zipp==3.1.0
colorapp/textPhonographer.py
View file @
b44ad433
# -*- encoding:utf8 -*-
######### phon2graph.py #########
#
# Version Python 3.7.1
#
# Prend en entrée :
# - dictionnaire phonème→couleur (phoneme-couleur.csv)
# - dictionnaire phonème→graphies (phoneme-graphies_fr.scsv)
# - dictionnaire phonétisé (dico_frwiki.csv)
#
# cf. README pour explications détaillées
import
re
import
sys
import
os
import
datetime
import
json
import
re
,
sys
,
os
,
datetime
,
json
,
tempfile
,
csv
,
locale
,
pinyin
from
colorapp.models
import
Entree
,
DicEntry
,
LogStat
import
tempfile
import
csv
from
collections
import
OrderedDict
import
locale
locale
.
setlocale
(
locale
.
LC_ALL
,
""
)
from
sys
import
path
as
pylib
#im naming it as pylib so that we won't get confused between os.path and sys.path
pylib
+=
[
os
.
path
.
relpath
(
r
'../phon2graph'
)]
from
phon2graph
import
decoupage
from
phon2graph
_french
import
decoupage
from
phon2graph_english
import
decoupageEn
# ENGLISH
from
phon2graph_mandarin
import
pinyin2phon
# MANDARIN CHINESE
# FICHIERS
phonColFile
=
"../phon2graph/data/api2class.json"
phonGraphFile
=
"../phon2graph/data/fidel_wikicolor.scsv"
# "../phon2graph/data/phoneme-graphies_fr.scsv"
phonGraphFileEn
=
"../phon2graph/data/fidel_wikicolor_en_global.scsv"
# ENGLISH
pinyin2apiFile
=
"../phon2graph/data/pinyin2api.json"
# MANDARIN
api2classFile
=
"../phon2graph/data/api2class.json"
# MANDARIN
dicFile
=
"../wikiphon/dico_frwiktionary-20200301_v2.json"
#dicFileEn = "../wikiphon/dico_enWiktionary-20200704_v1.json" # ENGLISH
dicFileEn
=
"../wikiphon/dico_enWikiCmu.json"
dicFileZh
=
"../wikiphon/dico_zhCCDict_20201206_v1.json"
logFile
=
"../logs/dico_frwiktionary-20200301_v2.log"
logFileEn
=
"../logs/dico_enWiktionary-20200704_v1.log"
# ENGLISH
...
...
@@ -44,47 +29,84 @@ logFileEn = "../logs/dico_enWiktionary-20200704_v1.log" # ENGLISH
logBugFile
=
"../logs/wikicolor-bug.log"
logBugFileEn
=
"../logs/wikicolorEn-bug.log"
# ENGLISH
# LECTURE DU CODE PHONEME-COULEUR
########################################
######### LECTURE DES FICHIERS #########
########################################
##### LECTURE DU CODE PHONEME-COULEUR #####
with
open
(
phonColFile
,
"r"
)
as
phonFile
:
phon2class
=
json
.
load
(
phonFile
)
# LECTURE DE LA LISTE PHONEME-GRAPHIES (FIDEL)
phonFile
=
open
(
phonGraphFile
,
mode
=
"r"
)
phon2graphFr
=
{}
phonCpt
=
0
graphCpt
=
0
##### LECTURE DES DICTIONNAIRES #####
def
getLenDic
(
dic
):
lenDic
=
0
for
k
in
dic
.
keys
():
lenDic
+=
1
return
lenDic
for
line
in
phonFile
:
phonCpt
+=
1
line
=
line
.
strip
()
l
=
line
.
split
(
':'
)
## FR
word2transFr
=
{}
# un mot → liste de trans possibles
with
open
(
dicFile
,
'r'
)
as
f
:
word2transFr
=
json
.
load
(
f
)
print
(
"Nombre d'entrées dans le dictionnaire de français :"
,
getLenDic
(
word2transFr
))
phon2graphFr
[
l
[
0
]]
=
[]
## EN
word2transEn
=
{}
# un mot → liste de trans possibles
with
open
(
dicFileEn
,
'r'
)
as
f
:
word2transEn
=
json
.
load
(
f
)
print
(
"Nombre d'entrées dans le dictionnaire d'anglais :"
,
getLenDic
(
word2transEn
))
listegraphies
=
l
[
1
].
split
(
','
)
for
graph
in
listegraphies
:
phon2graphFr
[
l
[
0
]].
append
(
graph
.
replace
(
"'"
,
"’"
))
graphCpt
+=
1
## ZH
word2transZh
=
{}
# un mot → une transcription pinyin
with
open
(
dicFileZh
,
'r'
)
as
f
:
word2transZh
=
json
.
load
(
f
)
print
(
"Nombre d'entrées dans le dictionnaire de mandarin :"
,
getLenDic
(
word2transZh
))
phonFile
.
close
()
##### LECTURE DES LISTES PHONEME-GRAPHIES (FIDEL) #####
# LECTURE DU DICTIONNAIRE
word2transFr
=
{}
# un mot → liste de trans possibles
with
open
(
dicFile
,
'r'
)
as
f
:
word2transFr
=
json
.
load
(
f
)
lenDic
=
0
for
k
in
word2transFr
.
keys
():
lenDic
+=
1
print
(
'len frwiki :'
,
lenDic
)
def
getLenDic
():
lenDic
=
0
for
k
in
word2transFr
.
keys
():
lenDic
+=
1
return
lenDic
## FR
with
open
(
phonGraphFile
,
mode
=
"r"
)
as
phonFile
:
phon2graphFr
=
{}
phonCpt
=
0
graphCpt
=
0
for
line
in
phonFile
:
phonCpt
+=
1
line
=
line
.
strip
()
l
=
line
.
split
(
':'
)
phon2graphFr
[
l
[
0
]]
=
[]
listegraphies
=
l
[
1
].
split
(
','
)
for
graph
in
listegraphies
:
phon2graphFr
[
l
[
0
]].
append
(
graph
.
replace
(
"'"
,
"’"
))
graphCpt
+=
1
## EN
with
open
(
phonGraphFileEn
,
mode
=
"r"
)
as
phonFileEn
:
phon2graphEn
=
{}
phonCptEn
=
0
graphCptEn
=
0
for
line
in
phonFileEn
:
phonCptEn
+=
1
line
=
line
.
strip
()
l
=
line
.
split
(
':'
)
phon2graphEn
[
l
[
0
]]
=
[]
# LECTURE DES LOG
listegraphies
=
l
[
1
].
split
(
','
)
for
graph
in
listegraphies
:
phon2graphEn
[
l
[
0
]].
append
(
graph
.
replace
(
"'"
,
"’"
))
graphCptEn
+=
1
##### LECTURE DES LOGS #####
## FR
logDicFr
=
{}
with
open
(
logFile
,
'r'
)
as
logf
:
logDicFr
=
json
.
load
(
logf
)
...
...
@@ -92,7 +114,7 @@ cptEdit = 0
for
i
,
j
in
logDicFr
.
items
():
for
k
in
j
:
cptEdit
+=
1
print
(
"Nombre de modifications du dictionnaire :"
,
cptEdit
)
print
(
"Nombre de modifications du dictionnaire
de français
:"
,
cptEdit
)
def
getLogStat
():
logStat
=
LogStat
()
logStat
.
cptEdit
=
0
...
...
@@ -110,46 +132,9 @@ def getLogStat():
logBug
=
{}
with
open
(
logBugFile
,
'r'
)
as
logf
:
logBug
=
json
.
load
(
logf
)
print
(
"Nombre de bug d'alignement enregistrés :"
,
len
(
logBug
))
#############################
##### FICHIERS POUR L'ANGLAIS
# LECTURE DE LA LISTE PHONEME-GRAPHIES (FIDEL) --ENGLISH--
phonFileEn
=
open
(
phonGraphFileEn
,
mode
=
"r"
)
phon2graphEn
=
{}
phonCptEn
=
0
graphCptEn
=
0
for
line
in
phonFileEn
:
phonCptEn
+=
1
line
=
line
.
strip
()
l
=
line
.
split
(
':'
)
print
(
"Nombre de bug d'alignement enregistrés en français :"
,
len
(
logBug
))
phon2graphEn
[
l
[
0
]]
=
[]
listegraphies
=
l
[
1
].
split
(
','
)
for
graph
in
listegraphies
:
phon2graphEn
[
l
[
0
]].
append
(
graph
.
replace
(
"'"
,
"’"
))
graphCptEn
+=
1
phonFileEn
.
close
()
# LECTURE DU DICTIONNAIRE --ENGLISH--
word2transEn
=
{}
# un mot → liste de trans possibles
with
open
(
dicFileEn
,
'r'
)
as
f
:
word2transEn
=
json
.
load
(
f
)
lenDicEn
=
0
for
k
in
word2transEn
.
keys
():
lenDicEn
+=
1
print
(
'len enwiki :'
,
lenDicEn
)
def
getLenDicEn
():
lenDicEn
=
0
for
k
in
word2transEn
.
keys
():
lenDicEn
+=
1
return
lenDicEn
# LECTURE DES LOG --ENGLISH--
## EN
logDicEn
=
{}
with
open
(
logFileEn
,
'r'
)
as
logf
:
logDicEn
=
json
.
load
(
logf
)
...
...
@@ -157,7 +142,7 @@ cptEditEn = 0
for
i
,
j
in
logDicEn
.
items
():
for
k
in
j
:
cptEditEn
+=
1
print
(
"
(ENGLISH)
Nombre de modifications du dictionnaire :"
,
cptEditEn
)
print
(
"Nombre de modifications du dictionnaire
d'anglais
:"
,
cptEditEn
)
def
getLogStatEn
():
logStatEn
=
LogStat
()
logStatEn
.
cptEdit
=
0
...
...
@@ -175,10 +160,22 @@ def getLogStatEn():
logBugEn
=
{}
with
open
(
logBugFileEn
,
'r'
)
as
logf
:
logBugEn
=
json
.
load
(
logf
)
print
(
"
(ENGLISH)
Nombre de bug d'alignement enregistrés :"
,
len
(
logBugEn
))
print
(
"Nombre de bug d'alignement enregistrés
en anglais
:"
,
len
(
logBugEn
))
def
mimi
(
mot
,
lang
):
##### SUPPLEMENTS POUR MANDARIN #####
with
open
(
pinyin2apiFile
)
as
inFile
:
pinyin2api
=
json
.
load
(
inFile
)
with
open
(
api2classFile
)
as
inFile
:
api2class
=
json
.
load
(
inFile
)
########################################
######### LISTE DES FONCTIONS ##########
########################################
def
traitement
(
mot
,
lang
):
if
lang
==
"fr"
:
word2trans
=
word2transFr
phon2graph
=
phon2graphFr
...
...
@@ -247,9 +244,36 @@ def mimi(mot, lang):
cased
+=
l
.
upper
()
if
cptlettre
==
m
else
l
cptlettre
+=
1
r
[
0
][
k
]
=
(
tupl
[
0
],
cased
)
return
result
def
traitementzh
(
mot
):
result
=
[]
# liste type : [[car, api, phonlist, ton], [car, api, phonlist, ton]...]
if
mot
in
word2transZh
.
keys
():
pinyinOutput
=
word2transZh
[
mot
][
"p"
].
lower
()
print
(
"Mot trouvé dans le dictionnaire :"
,
mot
,
pinyinOutput
)
else
:
print
(
"Mot non trouvé dans le dictionnaire!"
)
pinyinOutput
=
pinyin
.
get
(
mot
,
format
=
"numerical"
,
delimiter
=
" "
)
print
(
"Translittération automatique :"
,
pinyinOutput
)
# ni3 hao3
pinparse
=
pinyinOutput
.
split
(
' '
)
# ['ni3', 'hao3']
for
hanzindex
,
pintone
in
enumerate
(
pinparse
):
if
pintone
[
-
1
]
in
[
'1'
,
'2'
,
'3'
,
'4'
,
'5'
]
and
len
(
pintone
)
>
1
:
# Si il y a un ton et que ce n'est pas qu'un chiffre, c'est que la pinyinisation a fonctionné
res
=
[
mot
[
hanzindex
]]
for
el
in
pinyin2phon
(
pintone
,
pinyin2api
,
api2class
):
res
.
append
(
el
)
result
.
append
(
res
)
else
:
result
.
append
((
mot
[
hanzindex
],
""
,
[],
0
))
print
(
result
)
return
result
def
getEntryByWord
(
m
,
mc
,
t
,
tc
,
lang
):
# m = mot (contenu de la barre de recherche "mot"),
# mc = motCond (condition de recherche : contient, est égal à, commence par, finit par),
...
...
colorapp/views.py
View file @
b44ad433
...
...
@@ -7,8 +7,9 @@ import spacy
import
subprocess
import
re
nlpFr
=
spacy
.
load
(
'fr'
)
nlpFr
=
spacy
.
load
(
'fr
_core_news_md
'
)
nlpEn
=
spacy
.
load
(
"en_core_web_sm"
)
nlpZh
=
spacy
.
load
(
"zh_core_web_sm"
)
logFile
=
"../logs/dico_frwiktionary-20200301_v2.log"
def
redirApp
(
request
):
...
...
@@ -27,6 +28,8 @@ def colorize(request):
nlpText
=
nlpFr
(
text
)
elif
lang
==
"en"
:
nlpText
=
nlpEn
(
text
)
elif
lang
==
"zh"
:
nlpText
=
nlpZh
(
text
)
outText
=
[]
for
token
in
nlpText
:
...
...
@@ -38,22 +41,25 @@ def colorize(request):
outText
.
append
(
'§'
)
else
:
print
(
"Mot en entrée :"
,
token
.
text
)
if
lang
==
"en"
:
result
=
txtphono
.
mimi
(
token
.
text
,
lang
)
else
:
result
=
txtphono
.
mimi
(
token
.
text
,
lang
)
print
(
result
)
phonographieList
=
[]
for
r
in
result
:
phonographie
=
[]
for
i
in
r
[
0
]:
ph
=
{}
ph
[
'phon'
]
=
i
[
0
]
ph
[
'graph'
]
=
i
[
1
]
phonographie
.
append
(
ph
)
phonographieList
.
append
((
phonographie
,
r
[
1
],
r
[
2
],
r
[
3
]))
outText
.
append
(
phonographieList
)
if
lang
==
"fr"
or
lang
==
"en"
:
result
=
txtphono
.
traitement
(
token
.
text
,
lang
)
phonographieList
=
[]
for
r
in
result
:
phonographie
=
[]
for
i
in
r
[
0
]:
ph
=
{}
ph
[
'phon'
]
=
i
[
0
]
ph
[
'graph'
]
=
i
[
1
]
phonographie
.
append
(
ph
)
phonographieList
.
append
((
phonographie
,
r
[
1
],
r
[
2
],
r
[
3
]))
outText
.
append
(
phonographieList
)
elif
lang
==
"zh"
:
result
=
txtphono
.
traitementzh
(
token
.
text
)
outText
.
append
(
result
)
print
(
"Résultat en sortie :"
,
result
)
rep
=
{
'outText'
:
outText
}
...
...
coloriseur/urls.py
View file @
b44ad433
...
...
@@ -24,6 +24,7 @@ urlpatterns = [
path
(
''
,
colorapp_views
.
main
),
path
(
'en/'
,
colorapp_views
.
main
),
path
(
'fr/'
,
colorapp_views
.
main
),
path
(
'zh/'
,
colorapp_views
.
main
),
path
(
'colorize/'
,
csrf_exempt
(
colorapp_views
.
colorize
)),
path
(
'getPhonoOf/'
,
csrf_exempt
(
colorapp_views
.
getPhonoOf
)),
#path('getAllPhonographiesOf/', csrf_exempt(colorapp_views.getAllPhonographiesOf)),
...
...
static/im/btn-zi.png
0 → 100644
View file @
b44ad433
14.3 KB
static/im/zhtokenspaceblack.png
0 → 100644
View file @
b44ad433
1.39 KB
static/im/zhtokenspacewhite.png
0 → 100644
View file @
b44ad433
1.44 KB
static/scripts/main.js
View file @
b44ad433
...
...
@@ -19,12 +19,13 @@ var thisPageLang = "";
// set page target language
setLangFromUrl
()
function
setLangFromUrl
()
{
var
pageLang
=
thisURL
.
match
(
/.*
\/(
fr|en
)
/
);
var
pageLang
=
thisURL
.
match
(
/.*
\/(
fr|en
|zh
)
/
);
if
(
pageLang
)
{
console
.
log
(
"
Langue indiquée par l'url:
"
,
pageLang
[
1
]);
thisPageLang
=
pageLang
[
1
];
if
(
pageLang
[
1
]
==
"
fr
"
)
selectLang
(
"
fr
"
);
if
(
pageLang
[
1
]
==
"
en
"
)
selectLang
(
"
en
"
);
if
(
pageLang
[
1
]
==
"
en
"
)
selectLang
(
"
en
"
);
if
(
pageLang
[
1
]
==
"
zh
"
)
selectLang
(
"
zh
"
);
}
else
{
console
.
log
(
"
Chargement langue par défaut (fr)
"
);
thisPageLang
=
"
fr
"
...
...
@@ -41,9 +42,17 @@ function selectLang(lang){
document
.
getElementById
(
'
monochromeLabel
'
).
style
.
display
=
""
;
document
.
getElementById
(
'
monochrome
'
).
style
.
display
=
""
;
document
.
getElementById
(
'
silentWayLabel
'
).
style
.
margin
=
"
-20px 0px
"
;
document
.
getElementById
(
'
bicolor
'
).
style
.
display
=
""
;
document
.
getElementById
(
'
bicolorLabel
'
).
style
.
display
=
""
;
document
.
getElementById
(
'
bicolorLabel
'
).
style
.
margin
=
"
-10px 0px
"
;
document
.
getElementById
(
'
subtitle
'
).
style
.
marginTop
=
"
-20px
"
;
// Paramètres output
document
.
getElementById
(
'
ti_btnCopierColler
'
).
style
.
display
=
""
;
document
.
getElementById
(
'
ti_btnBold
'
).
style
.
display
=
""
;
document
.
getElementById
(
'
btnzi
'
).
style
.
display
=
"
none
"
;
document
.
getElementById
(
'
btnwordspace
'
).
style
.
display
=
"
none
"
;
window
.
history
.
pushState
(
""
,
""
,
"
/en
"
);
}
else
if
(
lang
==
"
fr
"
){
...
...
@@ -54,10 +63,37 @@ function selectLang(lang){
document
.
getElementById
(
'
monochromeLabel
'
).
style
.
display
=
"
none
"
;
document
.
getElementById
(
'
monochrome
'
).
style
.
display
=
"
none
"
;
document
.
getElementById
(
'
silentWayLabel
'
).
style
.
margin
=
"
0px 0px
"
;
document
.
getElementById
(
'
bicolor
'
).
style
.
display
=
""
;
document
.
getElementById
(
'
bicolorLabel
'
).
style
.
display
=
""
;
document
.
getElementById
(
'
bicolorLabel
'
).
style
.
margin
=
"
0px 0px
"
;
document
.
getElementById
(
'
subtitle
'
).
style
.
marginTop
=
"
0px
"
;
// Paramètres output
document
.
getElementById
(
'
ti_btnCopierColler
'
).
style
.
display
=
""
;
document
.
getElementById
(
'
ti_btnBold
'
).
style
.
display
=
""
;
document
.
getElementById
(
'
btnzi
'
).
style
.
display
=
"
none
"
;
document
.
getElementById
(
'
btnwordspace
'
).
style
.
display
=
"
none
"
;
window
.
history
.
pushState
(
""
,
""
,
"
/fr
"
);
}
else
if
(
lang
==
"
zh
"
){
interface
(
"
zh
"
);
document
.
getElementById
(
'
choixLang
'
).
value
=
'
zh
'
;
if
(
boolBold
==
false
)
toggleBold
();
document
.
getElementById
(
'
monochromeLabel
'
).
style
.
display
=
"
none
"
;
document
.
getElementById
(
'
monochrome
'
).
style
.
display
=
"
none
"
;
document
.
getElementById
(
'
silentWayLabel
'
).
style
.
margin
=
"
0px 0px
"
;
document
.
getElementById
(
'
bicolor
'
).
style
.
display
=
"
none
"
;
document
.
getElementById
(
'
bicolorLabel
'
).
style
.
display
=
"
none
"
;
document
.
getElementById
(
'
subtitle
'
).
style
.
marginTop
=
"
0px
"
;
// Paramètres output
document
.
getElementById
(
'
ti_btnCopierColler
'
).
style
.
display
=
"
none
"
;
document
.
getElementById
(
'
ti_btnBold
'
).
style
.
display
=
"
none
"
;
document
.
getElementById
(
'
btnzi
'
).
style
.
display
=
""
;
document
.
getElementById
(
'
btnwordspace
'
).
style
.
display
=
""
;
window
.
history
.
pushState
(
""
,
""
,
"
/zh
"
);
}
}
...
...
@@ -120,117 +156,165 @@ async function getColorisation() {
console
.
log
(
data
);
// ÉCRITURE DU RÉSULTATS DANS DIV RÉSULTATS
var
outputDiv
=
document
.
getElementById
(
'
output
'
);
var
outText
=
data
[
'
outText
'
];
outputDiv
.
innerHTML
=
""
;
dicoTok
=
{};
dicoId
=
{};
var
noSpace
=
false
;
for
(
i
=
0
;
i
<
outText
.
length
;
i
++
)
{
// REMPLISSAGE DES ZONES MOTS DANS DIV RÉSULTATS
if
(
outText
[
i
][
0
][
0
]
==
[])
{
// Sécurité: si phonigraphie est une liste vide, on tente quand même d'afficher du texte en gris
console
.
log
(
"
Bug outText[
"
,
i
,
"
][0][0] is empty!
"
);
// ici trouver code pour renvoyer txt brut
}
else
if
(
outText
[
i
][
0
][
0
][
0
]
==
undefined
){
console
.
log
(
"
undefined: index
"
,
i
,
outText
[
i
][
0
][
0
])
}
else
{
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/
\n
+/
)){
for
(
h
=
0
;
h
<
outText
[
i
][
0
][
0
][
0
].
graph
.
length
;
h
++
)
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<br>
'
;
}
}
else
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/^
(
,|
\.
|…|
\)
|
\]
|
\}
|%|>|»|”|-
)
$/
))
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span class="phon_neutre">
'
+
outText
[
i
][
0
][
0
][
0
].
graph
+
'
</span>
'
;
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/^-$/
))
noSpace
=
true
;
else
noSpace
=
false
;
}
else
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/
\(
|
\[
|
\{
|<|«|“/
))
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span> </span><span class="phon_neutre">
'
+
outText
[
i
][
0
][
0
][
0
].
graph
+
'
</span>
'
;
noSpace
=
true
;
}
else
if
(
noSpace
)
{
if
(
outText
[
i
].
length
>
1
)
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span class="tokens" id="tok
'
+
i
+
'
" onclick="showAlignPop(this.id)"></span>
'
;
}
else
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span class="tokens" id="tok
'
+
i
+
'
"></span>
'
;
}
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/^.+’$/
))
noSpace
=
true
;
else
noSpace
=
false
;
if
(
lang
==
"
fr
"
||
lang
==
"
en
"
){
document
.
getElementById
(
'
outputzh
'
).
innerHTML
=
""
;
var
outputDiv
=
document
.
getElementById
(
'
output
'
);
var
outText
=
data
[
'
outText
'
];
outputDiv
.
innerHTML
=
""
;
dicoTok
=
{};
dicoId
=
{};
var
noSpace
=
false
;
for
(
i
=
0
;
i
<
outText
.
length
;
i
++
)
{
// REMPLISSAGE DES ZONES MOTS DANS DIV RÉSULTATS
if
(
outText
[
i
][
0
][
0
]
==
[])
{
// Sécurité: si phonigraphie est une liste vide, on tente quand même d'afficher du texte en gris
console
.
log
(
"
Bug outText[
"
,
i
,
"
][0][0] is empty!
"
);
// ici trouver code pour renvoyer txt brut
}
else
if
(
outText
[
i
][
0
][
0
][
0
]
==
undefined
){
console
.
log
(
"
undefined: index
"
,
i
,
outText
[
i
][
0
][
0
])
}
else
{
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/^’.+$/
))
{
// I'll you're
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/
\n
+/
)){
for
(
h
=
0
;
h
<
outText
[
i
][
0
][
0
][
0
].
graph
.
length
;
h
++
)
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<br>
'
;
}
}
else
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/^
(
,|
\.
|…|
\)
|
\]
|
\}
|%|>|»|”|-
)
$/
))
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span class="phon_neutre">
'
+
outText
[
i
][
0
][
0
][
0
].
graph
+
'
</span>
'
;
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/^-$/
))
noSpace
=
true
;
else
noSpace
=
false
;
}
else
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/
\(
|
\[
|
\{
|<|«|“/
))
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span> </span><span class="phon_neutre">
'
+
outText
[
i
][
0
][
0
][
0
].
graph
+
'
</span>
'
;
noSpace
=
true
;
}
else
if
(
noSpace
)
{
if
(
outText
[
i
].
length
>
1
)
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span class="tokens" id="tok
'
+
i
+
'
" onclick="showAlignPop(this.id)"></span>
'
;
}
else
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span class="tokens" id="tok
'
+
i
+
'
"></span>
'
;
}
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/^.+’$/
))
noSpace
=
true
;
else
noSpace
=
false
;
}
else
{
if
(
outText
[
i
].
length
>
1
)
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span> </span><span class="tokens" id="tok
'
+
i
+
'
" onclick="showAlignPop(this.id)"></span>
'
;
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/^’.+$/
))
{
// I'll you're
if
(
outText
[
i
].
length
>
1
)
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span class="tokens" id="tok
'
+
i
+
'
" onclick="showAlignPop(this.id)"></span>
'
;
}
else
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span class="tokens" id="tok
'
+
i
+
'
"></span>
'
;
}
}
else
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span> </span><span class="tokens" id="tok
'
+
i
+
'
"></span>
'
;
if
(
outText
[
i
].
length
>
1
)
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span> </span><span class="tokens" id="tok
'
+
i
+
'
" onclick="showAlignPop(this.id)"></span>
'
;
}
else
{
outputDiv
.
innerHTML
=
outputDiv
.
innerHTML
+
'
<span> </span><span class="tokens" id="tok
'
+
i
+
'
"></span>
'
;
}
}
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/^.+’$/
))
noSpace
=
true
;
else
noSpace
=
false
;
}
if
(
outText
[
i
][
0
][
0
][
0
].
graph
.
match
(
/^.+’$/
))
noSpace
=
true
;
else
noSpace
=
false
;
}
// FORMATAGE DES SPANS
if
(
outText
[
i
][
0
][
0
][
0
].
graph
!=
'
\n
'
){
dicoTok
[
'
tok
'
+
i
]
=
[];
waitinglist
=
[];
// we will put all non aligned ones here, waiting to pushing them all at the end (so that they can't appear first)
for
(
j
=
0
;
j
<
outText
[
i
].
length
;
j
++
)
{
var
newWord
=
''
;