Compare revisions

1cff5b5f · 1cff5b5f · 1cff5b5f · 1cff5b5f · 1cff5b5f · 1cff5b5f
--- a/GTSRB/batch_slurm.sh
+++ b/GTSRB/batch_slurm.sh
 #!/bin/bash
-
-#SBATCH --job-name="GTSRB Full conv."                  # nom du job
-#SBATCH --ntasks=1                                     # nombre de tâche (un unique processus ici)
-#SBATCH --gres=gpu:1                                   # nombre de GPU à réserver (un unique GPU ici)
-#SBATCH --cpus-per-task=10                             # nombre de coeurs à réserver (un quart du noeud)
-#SBATCH --hint=nomultithread                           # on réserve des coeurs physiques et non logiques
-#SBATCH --time=03:00:00                                # temps exécution maximum demande (HH:MM:SS)
-#SBATCH --output="_batch/GTSRB_%j.out"                 # nom du fichier de sortie
-#SBATCH --error="_batch/GTSRB_%j.err"                  # nom du fichier d'erreur (ici commun avec la sortie)
-#SBATCH --mail-user=Jean-Luc.Parouty@grenoble-inp.fr
-#SBATCH --mail-type=ALL
-
 # -----------------------------------------------
 #         _           _       _
 #        | |__   __ _| |_ ___| |__
@@ -20,16 +8,45 @@
 #                              Fidle at IDRIS
 # -----------------------------------------------
 #
-# <!-- TITLE --> [GTS9] - Slurm batch submission
-# <!-- DESC -->  Bash script Slurm batch submission of GTSRB notebook 
+# <!-- TITLE --> [K3GTSRB11] - SLURM batch script
+# <!-- DESC --> Bash script for a Slurm batch submission of an ipython code
 # <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+#
+# Soumission :  sbatch  /(...)/fidle/GTSRB/batch_slurm.sh
+# Suivi      :  squeue -u $USER

-MODULE_ENV="tensorflow-gpu/py3/2.2.0"
-RUN_DIR="$WORK/fidle/GTSRB"
-RUN_SCRIPT="./run/full_convolutions.py"
+# ==== Job parameters ==============================================

-# ---- This is an example tested at IDRIS
-#      You have to adapt it to your computing environment
+#SBATCH --job-name="GTSRB"                             # nom du job
+#SBATCH --ntasks=1                                     # nombre de tâche (un unique processus ici)
+#SBATCH --gres=gpu:1                                   # nombre de GPU à réserver (un unique GPU ici)
+#SBATCH --cpus-per-task=10                             # nombre de coeurs à réserver (un quart du noeud)
+#SBATCH --hint=nomultithread                           # on réserve des coeurs physiques et non logiques
+#SBATCH --time=01:00:00                                # temps exécution maximum demande (HH:MM:SS)
+#SBATCH --output="GTSRB_%j.out"                        # nom du fichier de sortie
+#SBATCH --error="GTSRB_%j.err"                         # nom du fichier d'erreur (ici commun avec la sortie)
+#SBATCH --mail-user=Jean-Luc.Parouty@grenoble-inp.fr
+#SBATCH --mail-type=ALL
+
+# ==== Notebook parameters =========================================
+
+MODULE_ENV="tensorflow-gpu/py3/2.4.0"
+NOTEBOOK_DIR="$WORK/fidle/GTSRB"
+
+SCRIPT_IPY="03-Better-convolutions.py"
+
+# ---- Environment vars used to override notebook/script parameters
+#
+export FIDLE_OVERRIDE_GTSRB3_run_dir="./data"
+export FIDLE_OVERRIDE_GTSRB3_enhanced_dir="./run/GTSRB3"
+export FIDLE_OVERRIDE_GTSRB3_model_name="model_01"
+export FIDLE_OVERRIDE_GTSRB3_dataset_name="set-24x24-L"
+export FIDLE_OVERRIDE_GTSRB3_batch_size=64
+export FIDLE_OVERRIDE_GTSRB3_epochs=5
+export FIDLE_OVERRIDE_GTSRB3_scale=1
+export FIDLE_OVERRIDE_GTSRB3_fit_verbosity=0
+
+# ==================================================================

 echo '------------------------------------------------------------'
 echo "Start : $0"
@@ -38,11 +55,12 @@ echo "Job id        : $SLURM_JOB_ID"
 echo "Job name      : $SLURM_JOB_NAME"
 echo "Job node list : $SLURM_JOB_NODELIST"
 echo '------------------------------------------------------------'
-echo "Script        : $RUN_SCRIPT"
-echo "Run in        : $RUN_DIR"
-echo "With env.     : $MODULE_ENV"
+echo "Notebook dir  : $NOTEBOOK_DIR"
+echo "Script        : $SCRIPT_IPY"
+echo "Environment   : $MODULE_ENV"
+echo '------------------------------------------------------------'
+env | grep FIDLE_OVERRIDE | awk 'BEGIN { FS = "=" } ; { printf("%-35s : %s\n",$1,$2) }'
 echo '------------------------------------------------------------'
-

 # ---- Module

@@ -50,6 +68,9 @@ module purge
 module load "$MODULE_ENV"

 # ---- Run it...
-#
-cd "$RUN_DIR"
-ipython "$RUN_SCRIPT"
+
+cd $NOTEBOOK_DIR
+
+ipython "$SCRIPT_IPY"
+
+echo 'Done.'
\ No newline at end of file
--- a/GTSRB.Keras3/modules/ImagenetClassnames.json
+++ b/GTSRB.Keras3/modules/ImagenetClassnames.json
+{
+   "0":"tench, Tinca tinca",
+   "1":"goldfish, Carassius auratus",
+   "2":"great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias",
+   "3":"tiger shark, Galeocerdo cuvieri",
+   "4":"hammerhead, hammerhead shark",
+   "5":"electric ray, crampfish, numbfish, torpedo",
+   "6":"stingray",
+   "7":"cock",
+   "8":"hen",
+   "9":"ostrich, Struthio camelus",
+   "10":"brambling, Fringilla montifringilla",
+   "11":"goldfinch, Carduelis carduelis",
+   "12":"house finch, linnet, Carpodacus mexicanus",
+   "13":"junco, snowbird",
+   "14":"indigo bunting, indigo finch, indigo bird, Passerina cyanea",
+   "15":"robin, American robin, Turdus migratorius",
+   "16":"bulbul",
+   "17":"jay",
+   "18":"magpie",
+   "19":"chickadee",
+   "20":"water ouzel, dipper",
+   "21":"kite",
+   "22":"bald eagle, American eagle, Haliaeetus leucocephalus",
+   "23":"vulture",
+   "24":"great grey owl, great gray owl, Strix nebulosa",
+   "25":"European fire salamander, Salamandra salamandra",
+   "26":"common newt, Triturus vulgaris",
+   "27":"eft",
+   "28":"spotted salamander, Ambystoma maculatum",
+   "29":"axolotl, mud puppy, Ambystoma mexicanum",
+   "30":"bullfrog, Rana catesbeiana",
+   "31":"tree frog, tree-frog",
+   "32":"tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui",
+   "33":"loggerhead, loggerhead turtle, Caretta caretta",
+   "34":"leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea",
+   "35":"mud turtle",
+   "36":"terrapin",
+   "37":"box turtle, box tortoise",
+   "38":"banded gecko",
+   "39":"common iguana, iguana, Iguana iguana",
+   "40":"American chameleon, anole, Anolis carolinensis",
+   "41":"whiptail, whiptail lizard",
+   "42":"agama",
+   "43":"frilled lizard, Chlamydosaurus kingi",
+   "44":"alligator lizard",
+   "45":"Gila monster, Heloderma suspectum",
+   "46":"green lizard, Lacerta viridis",
+   "47":"African chameleon, Chamaeleo chamaeleon",
+   "48":"Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis",
+   "49":"African crocodile, Nile crocodile, Crocodylus niloticus",
+   "50":"American alligator, Alligator mississipiensis",
+   "51":"triceratops",
+   "52":"thunder snake, worm snake, Carphophis amoenus",
+   "53":"ringneck snake, ring-necked snake, ring snake",
+   "54":"hognose snake, puff adder, sand viper",
+   "55":"green snake, grass snake",
+   "56":"king snake, kingsnake",
+   "57":"garter snake, grass snake",
+   "58":"water snake",
+   "59":"vine snake",
+   "60":"night snake, Hypsiglena torquata",
+   "61":"boa constrictor, Constrictor constrictor",
+   "62":"rock python, rock snake, Python sebae",
+   "63":"Indian cobra, Naja naja",
+   "64":"green mamba",
+   "65":"sea snake",
+   "66":"horned viper, cerastes, sand viper, horned asp, Cerastes cornutus",
+   "67":"diamondback, diamondback rattlesnake, Crotalus adamanteus",
+   "68":"sidewinder, horned rattlesnake, Crotalus cerastes",
+   "69":"trilobite",
+   "70":"harvestman, daddy longlegs, Phalangium opilio",
+   "71":"scorpion",
+   "72":"black and gold garden spider, Argiope aurantia",
+   "73":"barn spider, Araneus cavaticus",
+   "74":"garden spider, Aranea diademata",
+   "75":"black widow, Latrodectus mactans",
+   "76":"tarantula",
+   "77":"wolf spider, hunting spider",
+   "78":"tick",
+   "79":"centipede",
+   "80":"black grouse",
+   "81":"ptarmigan",
+   "82":"ruffed grouse, partridge, Bonasa umbellus",
+   "83":"prairie chicken, prairie grouse, prairie fowl",
+   "84":"peacock",
+   "85":"quail",
+   "86":"partridge",
+   "87":"African grey, African gray, Psittacus erithacus",
+   "88":"macaw",
+   "89":"sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita",
+   "90":"lorikeet",
+   "91":"coucal",
+   "92":"bee eater",
+   "93":"hornbill",
+   "94":"hummingbird",
+   "95":"jacamar",
+   "96":"toucan",
+   "97":"drake",
+   "98":"red-breasted merganser, Mergus serrator",
+   "99":"goose",
+   "100":"black swan, Cygnus atratus",
+   "101":"tusker",
+   "102":"echidna, spiny anteater, anteater",
+   "103":"platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus",
+   "104":"wallaby, brush kangaroo",
+   "105":"koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus",
+   "106":"wombat",
+   "107":"jellyfish",
+   "108":"sea anemone, anemone",
+   "109":"brain coral",
+   "110":"flatworm, platyhelminth",
+   "111":"nematode, nematode worm, roundworm",
+   "112":"conch",
+   "113":"snail",
+   "114":"slug",
+   "115":"sea slug, nudibranch",
+   "116":"chiton, coat-of-mail shell, sea cradle, polyplacophore",
+   "117":"chambered nautilus, pearly nautilus, nautilus",
+   "118":"Dungeness crab, Cancer magister",
+   "119":"rock crab, Cancer irroratus",
+   "120":"fiddler crab",
+   "121":"king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica",
+   "122":"American lobster, Northern lobster, Maine lobster, Homarus americanus",
+   "123":"spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish",
+   "124":"crayfish, crawfish, crawdad, crawdaddy",
+   "125":"hermit crab",
+   "126":"isopod",
+   "127":"white stork, Ciconia ciconia",
+   "128":"black stork, Ciconia nigra",
+   "129":"spoonbill",
+   "130":"flamingo",
+   "131":"little blue heron, Egretta caerulea",
+   "132":"American egret, great white heron, Egretta albus",
+   "133":"bittern",
+   "134":"crane",
+   "135":"limpkin, Aramus pictus",
+   "136":"European gallinule, Porphyrio porphyrio",
+   "137":"American coot, marsh hen, mud hen, water hen, Fulica americana",
+   "138":"bustard",
+   "139":"ruddy turnstone, Arenaria interpres",
+   "140":"red-backed sandpiper, dunlin, Erolia alpina",
+   "141":"redshank, Tringa totanus",
+   "142":"dowitcher",
+   "143":"oystercatcher, oyster catcher",
+   "144":"pelican",
+   "145":"king penguin, Aptenodytes patagonica",
+   "146":"albatross, mollymawk",
+   "147":"grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus",
+   "148":"killer whale, killer, orca, grampus, sea wolf, Orcinus orca",
+   "149":"dugong, Dugong dugon",
+   "150":"sea lion",
+   "151":"Chihuahua",
+   "152":"Japanese spaniel",
+   "153":"Maltese dog, Maltese terrier, Maltese",
+   "154":"Pekinese, Pekingese, Peke",
+   "155":"Shih-Tzu",
+   "156":"Blenheim spaniel",
+   "157":"papillon",
+   "158":"toy terrier",
+   "159":"Rhodesian ridgeback",
+   "160":"Afghan hound, Afghan",
+   "161":"basset, basset hound",
+   "162":"beagle",
+   "163":"bloodhound, sleuthhound",
+   "164":"bluetick",
+   "165":"black-and-tan coonhound",
+   "166":"Walker hound, Walker foxhound",
+   "167":"English foxhound",
+   "168":"redbone",
+   "169":"borzoi, Russian wolfhound",
+   "170":"Irish wolfhound",
+   "171":"Italian greyhound",
+   "172":"whippet",
+   "173":"Ibizan hound, Ibizan Podenco",
+   "174":"Norwegian elkhound, elkhound",
+   "175":"otterhound, otter hound",
+   "176":"Saluki, gazelle hound",
+   "177":"Scottish deerhound, deerhound",
+   "178":"Weimaraner",
+   "179":"Staffordshire bullterrier, Staffordshire bull terrier",
+   "180":"American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier",
+   "181":"Bedlington terrier",
+   "182":"Border terrier",
+   "183":"Kerry blue terrier",
+   "184":"Irish terrier",
+   "185":"Norfolk terrier",
+   "186":"Norwich terrier",
+   "187":"Yorkshire terrier",
+   "188":"wire-haired fox terrier",
+   "189":"Lakeland terrier",
+   "190":"Sealyham terrier, Sealyham",
+   "191":"Airedale, Airedale terrier",
+   "192":"cairn, cairn terrier",
+   "193":"Australian terrier",
+   "194":"Dandie Dinmont, Dandie Dinmont terrier",
+   "195":"Boston bull, Boston terrier",
+   "196":"miniature schnauzer",
+   "197":"giant schnauzer",
+   "198":"standard schnauzer",
+   "199":"Scotch terrier, Scottish terrier, Scottie",
+   "200":"Tibetan terrier, chrysanthemum dog",
+   "201":"silky terrier, Sydney silky",
+   "202":"soft-coated wheaten terrier",
+   "203":"West Highland white terrier",
+   "204":"Lhasa, Lhasa apso",
+   "205":"flat-coated retriever",
+   "206":"curly-coated retriever",
+   "207":"golden retriever",
+   "208":"Labrador retriever",
+   "209":"Chesapeake Bay retriever",
+   "210":"German short-haired pointer",
+   "211":"vizsla, Hungarian pointer",
+   "212":"English setter",
+   "213":"Irish setter, red setter",
+   "214":"Gordon setter",
+   "215":"Brittany spaniel",
+   "216":"clumber, clumber spaniel",
+   "217":"English springer, English springer spaniel",
+   "218":"Welsh springer spaniel",
+   "219":"cocker spaniel, English cocker spaniel, cocker",
+   "220":"Sussex spaniel",
+   "221":"Irish water spaniel",
+   "222":"kuvasz",
+   "223":"schipperke",
+   "224":"groenendael",
+   "225":"malinois",
+   "226":"briard",
+   "227":"kelpie",
+   "228":"komondor",
+   "229":"Old English sheepdog, bobtail",
+   "230":"Shetland sheepdog, Shetland sheep dog, Shetland",
+   "231":"collie",
+   "232":"Border collie",
+   "233":"Bouvier des Flandres, Bouviers des Flandres",
+   "234":"Rottweiler",
+   "235":"German shepherd, German shepherd dog, German police dog, alsatian",
+   "236":"Doberman, Doberman pinscher",
+   "237":"miniature pinscher",
+   "238":"Greater Swiss Mountain dog",
+   "239":"Bernese mountain dog",
+   "240":"Appenzeller",
+   "241":"EntleBucher",
+   "242":"boxer",
+   "243":"bull mastiff",
+   "244":"Tibetan mastiff",
+   "245":"French bulldog",
+   "246":"Great Dane",
+   "247":"Saint Bernard, St Bernard",
+   "248":"Eskimo dog, husky",
+   "249":"malamute, malemute, Alaskan malamute",
+   "250":"Siberian husky",
+   "251":"dalmatian, coach dog, carriage dog",
+   "252":"affenpinscher, monkey pinscher, monkey dog",
+   "253":"basenji",
+   "254":"pug, pug-dog",
+   "255":"Leonberg",
+   "256":"Newfoundland, Newfoundland dog",
+   "257":"Great Pyrenees",
+   "258":"Samoyed, Samoyede",
+   "259":"Pomeranian",
+   "260":"chow, chow chow",
+   "261":"keeshond",
+   "262":"Brabancon griffon",
+   "263":"Pembroke, Pembroke Welsh corgi",
+   "264":"Cardigan, Cardigan Welsh corgi",
+   "265":"toy poodle",
+   "266":"miniature poodle",
+   "267":"standard poodle",
+   "268":"Mexican hairless",
+   "269":"timber wolf, grey wolf, gray wolf, Canis lupus",
+   "270":"white wolf, Arctic wolf, Canis lupus tundrarum",
+   "271":"red wolf, maned wolf, Canis rufus, Canis niger",
+   "272":"coyote, prairie wolf, brush wolf, Canis latrans",
+   "273":"dingo, warrigal, warragal, Canis dingo",
+   "274":"dhole, Cuon alpinus",
+   "275":"African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus",
+   "276":"hyena, hyaena",
+   "277":"red fox, Vulpes vulpes",
+   "278":"kit fox, Vulpes macrotis",
+   "279":"Arctic fox, white fox, Alopex lagopus",
+   "280":"grey fox, gray fox, Urocyon cinereoargenteus",
+   "281":"tabby, tabby cat",
+   "282":"tiger cat",
+   "283":"Persian cat",
+   "284":"Siamese cat, Siamese",
+   "285":"Egyptian cat",
+   "286":"cougar, puma, catamount, mountain lion, painter, panther, Felis concolor",
+   "287":"lynx, catamount",
+   "288":"leopard, Panthera pardus",
+   "289":"snow leopard, ounce, Panthera uncia",
+   "290":"jaguar, panther, Panthera onca, Felis onca",
+   "291":"lion, king of beasts, Panthera leo",
+   "292":"tiger, Panthera tigris",
+   "293":"cheetah, chetah, Acinonyx jubatus",
+   "294":"brown bear, bruin, Ursus arctos",
+   "295":"American black bear, black bear, Ursus americanus, Euarctos americanus",
+   "296":"ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus",
+   "297":"sloth bear, Melursus ursinus, Ursus ursinus",
+   "298":"mongoose",
+   "299":"meerkat, mierkat",
+   "300":"tiger beetle",
+   "301":"ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle",
+   "302":"ground beetle, carabid beetle",
+   "303":"long-horned beetle, longicorn, longicorn beetle",
+   "304":"leaf beetle, chrysomelid",
+   "305":"dung beetle",
+   "306":"rhinoceros beetle",
+   "307":"weevil",
+   "308":"fly",
+   "309":"bee",
+   "310":"ant, emmet, pismire",
+   "311":"grasshopper, hopper",
+   "312":"cricket",
+   "313":"walking stick, walkingstick, stick insect",
+   "314":"cockroach, roach",
+   "315":"mantis, mantid",
+   "316":"cicada, cicala",
+   "317":"leafhopper",
+   "318":"lacewing, lacewing fly",
+   "319":"dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+   "320":"damselfly",
+   "321":"admiral",
+   "322":"ringlet, ringlet butterfly",
+   "323":"monarch, monarch butterfly, milkweed butterfly, Danaus plexippus",
+   "324":"cabbage butterfly",
+   "325":"sulphur butterfly, sulfur butterfly",
+   "326":"lycaenid, lycaenid butterfly",
+   "327":"starfish, sea star",
+   "328":"sea urchin",
+   "329":"sea cucumber, holothurian",
+   "330":"wood rabbit, cottontail, cottontail rabbit",
+   "331":"hare",
+   "332":"Angora, Angora rabbit",
+   "333":"hamster",
+   "334":"porcupine, hedgehog",
+   "335":"fox squirrel, eastern fox squirrel, Sciurus niger",
+   "336":"marmot",
+   "337":"beaver",
+   "338":"guinea pig, Cavia cobaya",
+   "339":"sorrel",
+   "340":"zebra",
+   "341":"hog, pig, grunter, squealer, Sus scrofa",
+   "342":"wild boar, boar, Sus scrofa",
+   "343":"warthog",
+   "344":"hippopotamus, hippo, river horse, Hippopotamus amphibius",
+   "345":"ox",
+   "346":"water buffalo, water ox, Asiatic buffalo, Bubalus bubalis",
+   "347":"bison",
+   "348":"ram, tup",
+   "349":"bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis",
+   "350":"ibex, Capra ibex",
+   "351":"hartebeest",
+   "352":"impala, Aepyceros melampus",
+   "353":"gazelle",
+   "354":"Arabian camel, dromedary, Camelus dromedarius",
+   "355":"llama",
+   "356":"weasel",
+   "357":"mink",
+   "358":"polecat, fitch, foulmart, foumart, Mustela putorius",
+   "359":"black-footed ferret, ferret, Mustela nigripes",
+   "360":"otter",
+   "361":"skunk, polecat, wood pussy",
+   "362":"badger",
+   "363":"armadillo",
+   "364":"three-toed sloth, ai, Bradypus tridactylus",
+   "365":"orangutan, orang, orangutang, Pongo pygmaeus",
+   "366":"gorilla, Gorilla gorilla",
+   "367":"chimpanzee, chimp, Pan troglodytes",
+   "368":"gibbon, Hylobates lar",
+   "369":"siamang, Hylobates syndactylus, Symphalangus syndactylus",
+   "370":"guenon, guenon monkey",
+   "371":"patas, hussar monkey, Erythrocebus patas",
+   "372":"baboon",
+   "373":"macaque",
+   "374":"langur",
+   "375":"colobus, colobus monkey",
+   "376":"proboscis monkey, Nasalis larvatus",
+   "377":"marmoset",
+   "378":"capuchin, ringtail, Cebus capucinus",
+   "379":"howler monkey, howler",
+   "380":"titi, titi monkey",
+   "381":"spider monkey, Ateles geoffroyi",
+   "382":"squirrel monkey, Saimiri sciureus",
+   "383":"Madagascar cat, ring-tailed lemur, Lemur catta",
+   "384":"indri, indris, Indri indri, Indri brevicaudatus",
+   "385":"Indian elephant, Elephas maximus",
+   "386":"African elephant, Loxodonta africana",
+   "387":"lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens",
+   "388":"giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca",
+   "389":"barracouta, snoek",
+   "390":"eel",
+   "391":"coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch",
+   "392":"rock beauty, Holocanthus tricolor",
+   "393":"anemone fish",
+   "394":"sturgeon",
+   "395":"gar, garfish, garpike, billfish, Lepisosteus osseus",
+   "396":"lionfish",
+   "397":"puffer, pufferfish, blowfish, globefish",
+   "398":"abacus",
+   "399":"abaya",
+   "400":"academic gown, academic robe, judge's robe",
+   "401":"accordion, piano accordion, squeeze box",
+   "402":"acoustic guitar",
+   "403":"aircraft carrier, carrier, flattop, attack aircraft carrier",
+   "404":"airliner",
+   "405":"airship, dirigible",
+   "406":"altar",
+   "407":"ambulance",
+   "408":"amphibian, amphibious vehicle",
+   "409":"analog clock",
+   "410":"apiary, bee house",
+   "411":"apron",
+   "412":"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin",
+   "413":"assault rifle, assault gun",
+   "414":"backpack, back pack, knapsack, packsack, rucksack, haversack",
+   "415":"bakery, bakeshop, bakehouse",
+   "416":"balance beam, beam",
+   "417":"balloon",
+   "418":"ballpoint, ballpoint pen, ballpen, Biro",
+   "419":"Band Aid",
+   "420":"banjo",
+   "421":"bannister, banister, balustrade, balusters, handrail",
+   "422":"barbell",
+   "423":"barber chair",
+   "424":"barbershop",
+   "425":"barn",
+   "426":"barometer",
+   "427":"barrel, cask",
+   "428":"barrow, garden cart, lawn cart, wheelbarrow",
+   "429":"baseball",
+   "430":"basketball",
+   "431":"bassinet",
+   "432":"bassoon",
+   "433":"bathing cap, swimming cap",
+   "434":"bath towel",
+   "435":"bathtub, bathing tub, bath, tub",
+   "436":"beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon",
+   "437":"beacon, lighthouse, beacon light, pharos",
+   "438":"beaker",
+   "439":"bearskin, busby, shako",
+   "440":"beer bottle",
+   "441":"beer glass",
+   "442":"bell cote, bell cot",
+   "443":"bib",
+   "444":"bicycle-built-for-two, tandem bicycle, tandem",
+   "445":"bikini, two-piece",
+   "446":"binder, ring-binder",
+   "447":"binoculars, field glasses, opera glasses",
+   "448":"birdhouse",
+   "449":"boathouse",
+   "450":"bobsled, bobsleigh, bob",
+   "451":"bolo tie, bolo, bola tie, bola",
+   "452":"bonnet, poke bonnet",
+   "453":"bookcase",
+   "454":"bookshop, bookstore, bookstall",
+   "455":"bottlecap",
+   "456":"bow",
+   "457":"bow tie, bow-tie, bowtie",
+   "458":"brass, memorial tablet, plaque",
+   "459":"brassiere, bra, bandeau",
+   "460":"breakwater, groin, groyne, mole, bulwark, seawall, jetty",
+   "461":"breastplate, aegis, egis",
+   "462":"broom",
+   "463":"bucket, pail",
+   "464":"buckle",
+   "465":"bulletproof vest",
+   "466":"bullet train, bullet",
+   "467":"butcher shop, meat market",
+   "468":"cab, hack, taxi, taxicab",
+   "469":"caldron, cauldron",
+   "470":"candle, taper, wax light",
+   "471":"cannon",
+   "472":"canoe",
+   "473":"can opener, tin opener",
+   "474":"cardigan",
+   "475":"car mirror",
+   "476":"carousel, carrousel, merry-go-round, roundabout, whirligig",
+   "477":"carpenter's kit, tool kit",
+   "478":"carton",
+   "479":"car wheel",
+   "480":"cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM",
+   "481":"cassette",
+   "482":"cassette player",
+   "483":"castle",
+   "484":"catamaran",
+   "485":"CD player",
+   "486":"cello, violoncello",
+   "487":"cellular telephone, cellular phone, cellphone, cell, mobile phone",
+   "488":"chain",
+   "489":"chainlink fence",
+   "490":"chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour",
+   "491":"chain saw, chainsaw",
+   "492":"chest",
+   "493":"chiffonier, commode",
+   "494":"chime, bell, gong",
+   "495":"china cabinet, china closet",
+   "496":"Christmas stocking",
+   "497":"church, church building",
+   "498":"cinema, movie theater, movie theatre, movie house, picture palace",
+   "499":"cleaver, meat cleaver, chopper",
+   "500":"cliff dwelling",
+   "501":"cloak",
+   "502":"clog, geta, patten, sabot",
+   "503":"cocktail shaker",
+   "504":"coffee mug",
+   "505":"coffeepot",
+   "506":"coil, spiral, volute, whorl, helix",
+   "507":"combination lock",
+   "508":"computer keyboard, keypad",
+   "509":"confectionery, confectionary, candy store",
+   "510":"container ship, containership, container vessel",
+   "511":"convertible",
+   "512":"corkscrew, bottle screw",
+   "513":"cornet, horn, trumpet, trump",
+   "514":"cowboy boot",
+   "515":"cowboy hat, ten-gallon hat",
+   "516":"cradle",
+   "517":"crane",
+   "518":"crash helmet",
+   "519":"crate",
+   "520":"crib, cot",
+   "521":"Crock Pot",
+   "522":"croquet ball",
+   "523":"crutch",
+   "524":"cuirass",
+   "525":"dam, dike, dyke",
+   "526":"desk",
+   "527":"desktop computer",
+   "528":"dial telephone, dial phone",
+   "529":"diaper, nappy, napkin",
+   "530":"digital clock",
+   "531":"digital watch",
+   "532":"dining table, board",
+   "533":"dishrag, dishcloth",
+   "534":"dishwasher, dish washer, dishwashing machine",
+   "535":"disk brake, disc brake",
+   "536":"dock, dockage, docking facility",
+   "537":"dogsled, dog sled, dog sleigh",
+   "538":"dome",
+   "539":"doormat, welcome mat",
+   "540":"drilling platform, offshore rig",
+   "541":"drum, membranophone, tympan",
+   "542":"drumstick",
+   "543":"dumbbell",
+   "544":"Dutch oven",
+   "545":"electric fan, blower",
+   "546":"electric guitar",
+   "547":"electric locomotive",
+   "548":"entertainment center",
+   "549":"envelope",
+   "550":"espresso maker",
+   "551":"face powder",
+   "552":"feather boa, boa",
+   "553":"file, file cabinet, filing cabinet",
+   "554":"fireboat",
+   "555":"fire engine, fire truck",
+   "556":"fire screen, fireguard",
+   "557":"flagpole, flagstaff",
+   "558":"flute, transverse flute",
+   "559":"folding chair",
+   "560":"football helmet",
+   "561":"forklift",
+   "562":"fountain",
+   "563":"fountain pen",
+   "564":"four-poster",
+   "565":"freight car",
+   "566":"French horn, horn",
+   "567":"frying pan, frypan, skillet",
+   "568":"fur coat",
+   "569":"garbage truck, dustcart",
+   "570":"gasmask, respirator, gas helmet",
+   "571":"gas pump, gasoline pump, petrol pump, island dispenser",
+   "572":"goblet",
+   "573":"go-kart",
+   "574":"golf ball",
+   "575":"golfcart, golf cart",
+   "576":"gondola",
+   "577":"gong, tam-tam",
+   "578":"gown",
+   "579":"grand piano, grand",
+   "580":"greenhouse, nursery, glasshouse",
+   "581":"grille, radiator grille",
+   "582":"grocery store, grocery, food market, market",
+   "583":"guillotine",
+   "584":"hair slide",
+   "585":"hair spray",
+   "586":"half track",
+   "587":"hammer",
+   "588":"hamper",
+   "589":"hand blower, blow dryer, blow drier, hair dryer, hair drier",
+   "590":"hand-held computer, hand-held microcomputer",
+   "591":"handkerchief, hankie, hanky, hankey",
+   "592":"hard disc, hard disk, fixed disk",
+   "593":"harmonica, mouth organ, harp, mouth harp",
+   "594":"harp",
+   "595":"harvester, reaper",
+   "596":"hatchet",
+   "597":"holster",
+   "598":"home theater, home theatre",
+   "599":"honeycomb",
+   "600":"hook, claw",
+   "601":"hoopskirt, crinoline",
+   "602":"horizontal bar, high bar",
+   "603":"horse cart, horse-cart",
+   "604":"hourglass",
+   "605":"iPod",
+   "606":"iron, smoothing iron",
+   "607":"jack-o'-lantern",
+   "608":"jean, blue jean, denim",
+   "609":"jeep, landrover",
+   "610":"jersey, T-shirt, tee shirt",
+   "611":"jigsaw puzzle",
+   "612":"jinrikisha, ricksha, rickshaw",
+   "613":"joystick",
+   "614":"kimono",
+   "615":"knee pad",
+   "616":"knot",
+   "617":"lab coat, laboratory coat",
+   "618":"ladle",
+   "619":"lampshade, lamp shade",
+   "620":"laptop, laptop computer",
+   "621":"lawn mower, mower",
+   "622":"lens cap, lens cover",
+   "623":"letter opener, paper knife, paperknife",
+   "624":"library",
+   "625":"lifeboat",
+   "626":"lighter, light, igniter, ignitor",
+   "627":"limousine, limo",
+   "628":"liner, ocean liner",
+   "629":"lipstick, lip rouge",
+   "630":"Loafer",
+   "631":"lotion",
+   "632":"loudspeaker, speaker, speaker unit, loudspeaker system, speaker system",
+   "633":"loupe, jeweler's loupe",
+   "634":"lumbermill, sawmill",
+   "635":"magnetic compass",
+   "636":"mailbag, postbag",
+   "637":"mailbox, letter box",
+   "638":"maillot",
+   "639":"maillot, tank suit",
+   "640":"manhole cover",
+   "641":"maraca",
+   "642":"marimba, xylophone",
+   "643":"mask",
+   "644":"matchstick",
+   "645":"maypole",
+   "646":"maze, labyrinth",
+   "647":"measuring cup",
+   "648":"medicine chest, medicine cabinet",
+   "649":"megalith, megalithic structure",
+   "650":"microphone, mike",
+   "651":"microwave, microwave oven",
+   "652":"military uniform",
+   "653":"milk can",
+   "654":"minibus",
+   "655":"miniskirt, mini",
+   "656":"minivan",
+   "657":"missile",
+   "658":"mitten",
+   "659":"mixing bowl",
+   "660":"mobile home, manufactured home",
+   "661":"Model T",
+   "662":"modem",
+   "663":"monastery",
+   "664":"monitor",
+   "665":"moped",
+   "666":"mortar",
+   "667":"mortarboard",
+   "668":"mosque",
+   "669":"mosquito net",
+   "670":"motor scooter, scooter",
+   "671":"mountain bike, all-terrain bike, off-roader",
+   "672":"mountain tent",
+   "673":"mouse, computer mouse",
+   "674":"mousetrap",
+   "675":"moving van",
+   "676":"muzzle",
+   "677":"nail",
+   "678":"neck brace",
+   "679":"necklace",
+   "680":"nipple",
+   "681":"notebook, notebook computer",
+   "682":"obelisk",
+   "683":"oboe, hautboy, hautbois",
+   "684":"ocarina, sweet potato",
+   "685":"odometer, hodometer, mileometer, milometer",
+   "686":"oil filter",
+   "687":"organ, pipe organ",
+   "688":"oscilloscope, scope, cathode-ray oscilloscope, CRO",
+   "689":"overskirt",
+   "690":"oxcart",
+   "691":"oxygen mask",
+   "692":"packet",
+   "693":"paddle, boat paddle",
+   "694":"paddlewheel, paddle wheel",
+   "695":"padlock",
+   "696":"paintbrush",
+   "697":"pajama, pyjama, pj's, jammies",
+   "698":"palace",
+   "699":"panpipe, pandean pipe, syrinx",
+   "700":"paper towel",
+   "701":"parachute, chute",
+   "702":"parallel bars, bars",
+   "703":"park bench",
+   "704":"parking meter",
+   "705":"passenger car, coach, carriage",
+   "706":"patio, terrace",
+   "707":"pay-phone, pay-station",
+   "708":"pedestal, plinth, footstall",
+   "709":"pencil box, pencil case",
+   "710":"pencil sharpener",
+   "711":"perfume, essence",
+   "712":"Petri dish",
+   "713":"photocopier",
+   "714":"pick, plectrum, plectron",
+   "715":"pickelhaube",
+   "716":"picket fence, paling",
+   "717":"pickup, pickup truck",
+   "718":"pier",
+   "719":"piggy bank, penny bank",
+   "720":"pill bottle",
+   "721":"pillow",
+   "722":"ping-pong ball",
+   "723":"pinwheel",
+   "724":"pirate, pirate ship",
+   "725":"pitcher, ewer",
+   "726":"plane, carpenter's plane, woodworking plane",
+   "727":"planetarium",
+   "728":"plastic bag",
+   "729":"plate rack",
+   "730":"plow, plough",
+   "731":"plunger, plumber's helper",
+   "732":"Polaroid camera, Polaroid Land camera",
+   "733":"pole",
+   "734":"police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria",
+   "735":"poncho",
+   "736":"pool table, billiard table, snooker table",
+   "737":"pop bottle, soda bottle",
+   "738":"pot, flowerpot",
+   "739":"potter's wheel",
+   "740":"power drill",
+   "741":"prayer rug, prayer mat",
+   "742":"printer",
+   "743":"prison, prison house",
+   "744":"projectile, missile",
+   "745":"projector",
+   "746":"puck, hockey puck",
+   "747":"punching bag, punch bag, punching ball, punchball",
+   "748":"purse",
+   "749":"quill, quill pen",
+   "750":"quilt, comforter, comfort, puff",
+   "751":"racer, race car, racing car",
+   "752":"racket, racquet",
+   "753":"radiator",
+   "754":"radio, wireless",
+   "755":"radio telescope, radio reflector",
+   "756":"rain barrel",
+   "757":"recreational vehicle, RV, R.V.",
+   "758":"reel",
+   "759":"reflex camera",
+   "760":"refrigerator, icebox",
+   "761":"remote control, remote",
+   "762":"restaurant, eating house, eating place, eatery",
+   "763":"revolver, six-gun, six-shooter",
+   "764":"rifle",
+   "765":"rocking chair, rocker",
+   "766":"rotisserie",
+   "767":"rubber eraser, rubber, pencil eraser",
+   "768":"rugby ball",
+   "769":"rule, ruler",
+   "770":"running shoe",
+   "771":"safe",
+   "772":"safety pin",
+   "773":"saltshaker, salt shaker",
+   "774":"sandal",
+   "775":"sarong",
+   "776":"sax, saxophone",
+   "777":"scabbard",
+   "778":"scale, weighing machine",
+   "779":"school bus",
+   "780":"schooner",
+   "781":"scoreboard",
+   "782":"screen, CRT screen",
+   "783":"screw",
+   "784":"screwdriver",
+   "785":"seat belt, seatbelt",
+   "786":"sewing machine",
+   "787":"shield, buckler",
+   "788":"shoe shop, shoe-shop, shoe store",
+   "789":"shoji",
+   "790":"shopping basket",
+   "791":"shopping cart",
+   "792":"shovel",
+   "793":"shower cap",
+   "794":"shower curtain",
+   "795":"ski",
+   "796":"ski mask",
+   "797":"sleeping bag",
+   "798":"slide rule, slipstick",
+   "799":"sliding door",
+   "800":"slot, one-armed bandit",
+   "801":"snorkel",
+   "802":"snowmobile",
+   "803":"snowplow, snowplough",
+   "804":"soap dispenser",
+   "805":"soccer ball",
+   "806":"sock",
+   "807":"solar dish, solar collector, solar furnace",
+   "808":"sombrero",
+   "809":"soup bowl",
+   "810":"space bar",
+   "811":"space heater",
+   "812":"space shuttle",
+   "813":"spatula",
+   "814":"speedboat",
+   "815":"spider web, spider's web",
+   "816":"spindle",
+   "817":"sports car, sport car",
+   "818":"spotlight, spot",
+   "819":"stage",
+   "820":"steam locomotive",
+   "821":"steel arch bridge",
+   "822":"steel drum",
+   "823":"stethoscope",
+   "824":"stole",
+   "825":"stone wall",
+   "826":"stopwatch, stop watch",
+   "827":"stove",
+   "828":"strainer",
+   "829":"streetcar, tram, tramcar, trolley, trolley car",
+   "830":"stretcher",
+   "831":"studio couch, day bed",
+   "832":"stupa, tope",
+   "833":"submarine, pigboat, sub, U-boat",
+   "834":"suit, suit of clothes",
+   "835":"sundial",
+   "836":"sunglass",
+   "837":"sunglasses, dark glasses, shades",
+   "838":"sunscreen, sunblock, sun blocker",
+   "839":"suspension bridge",
+   "840":"swab, swob, mop",
+   "841":"sweatshirt",
+   "842":"swimming trunks, bathing trunks",
+   "843":"swing",
+   "844":"switch, electric switch, electrical switch",
+   "845":"syringe",
+   "846":"table lamp",
+   "847":"tank, army tank, armored combat vehicle, armoured combat vehicle",
+   "848":"tape player",
+   "849":"teapot",
+   "850":"teddy, teddy bear",
+   "851":"television, television system",
+   "852":"tennis ball",
+   "853":"thatch, thatched roof",
+   "854":"theater curtain, theatre curtain",
+   "855":"thimble",
+   "856":"thresher, thrasher, threshing machine",
+   "857":"throne",
+   "858":"tile roof",
+   "859":"toaster",
+   "860":"tobacco shop, tobacconist shop, tobacconist",
+   "861":"toilet seat",
+   "862":"torch",
+   "863":"totem pole",
+   "864":"tow truck, tow car, wrecker",
+   "865":"toyshop",
+   "866":"tractor",
+   "867":"trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi",
+   "868":"tray",
+   "869":"trench coat",
+   "870":"tricycle, trike, velocipede",
+   "871":"trimaran",
+   "872":"tripod",
+   "873":"triumphal arch",
+   "874":"trolleybus, trolley coach, trackless trolley",
+   "875":"trombone",
+   "876":"tub, vat",
+   "877":"turnstile",
+   "878":"typewriter keyboard",
+   "879":"umbrella",
+   "880":"unicycle, monocycle",
+   "881":"upright, upright piano",
+   "882":"vacuum, vacuum cleaner",
+   "883":"vase",
+   "884":"vault",
+   "885":"velvet",
+   "886":"vending machine",
+   "887":"vestment",
+   "888":"viaduct",
+   "889":"violin, fiddle",
+   "890":"volleyball",
+   "891":"waffle iron",
+   "892":"wall clock",
+   "893":"wallet, billfold, notecase, pocketbook",
+   "894":"wardrobe, closet, press",
+   "895":"warplane, military plane",
+   "896":"washbasin, handbasin, washbowl, lavabo, wash-hand basin",
+   "897":"washer, automatic washer, washing machine",
+   "898":"water bottle",
+   "899":"water jug",
+   "900":"water tower",
+   "901":"whiskey jug",
+   "902":"whistle",
+   "903":"wig",
+   "904":"window screen",
+   "905":"window shade",
+   "906":"Windsor tie",
+   "907":"wine bottle",
+   "908":"wing",
+   "909":"wok",
+   "910":"wooden spoon",
+   "911":"wool, woolen, woollen",
+   "912":"worm fence, snake fence, snake-rail fence, Virginia fence",
+   "913":"wreck",
+   "914":"yawl",
+   "915":"yurt",
+   "916":"web site, website, internet site, site",
+   "917":"comic book",
+   "918":"crossword puzzle, crossword",
+   "919":"street sign",
+   "920":"traffic light, traffic signal, stoplight",
+   "921":"book jacket, dust cover, dust jacket, dust wrapper",
+   "922":"menu",
+   "923":"plate",
+   "924":"guacamole",
+   "925":"consomme",
+   "926":"hot pot, hotpot",
+   "927":"trifle",
+   "928":"ice cream, icecream",
+   "929":"ice lolly, lolly, lollipop, popsicle",
+   "930":"French loaf",
+   "931":"bagel, beigel",
+   "932":"pretzel",
+   "933":"cheeseburger",
+   "934":"hotdog, hot dog, red hot",
+   "935":"mashed potato",
+   "936":"head cabbage",
+   "937":"broccoli",
+   "938":"cauliflower",
+   "939":"zucchini, courgette",
+   "940":"spaghetti squash",
+   "941":"acorn squash",
+   "942":"butternut squash",
+   "943":"cucumber, cuke",
+   "944":"artichoke, globe artichoke",
+   "945":"bell pepper",
+   "946":"cardoon",
+   "947":"mushroom",
+   "948":"Granny Smith",
+   "949":"strawberry",
+   "950":"orange",
+   "951":"lemon",
+   "952":"fig",
+   "953":"pineapple, ananas",
+   "954":"banana",
+   "955":"jackfruit, jak, jack",
+   "956":"custard apple",
+   "957":"pomegranate",
+   "958":"hay",
+   "959":"carbonara",
+   "960":"chocolate sauce, chocolate syrup",
+   "961":"dough",
+   "962":"meat loaf, meatloaf",
+   "963":"pizza, pizza pie",
+   "964":"potpie",
+   "965":"burrito",
+   "966":"red wine",
+   "967":"espresso",
+   "968":"cup",
+   "969":"eggnog",
+   "970":"alp",
+   "971":"bubble",
+   "972":"cliff, drop, drop-off",
+   "973":"coral reef",
+   "974":"geyser",
+   "975":"lakeside, lakeshore",
+   "976":"promontory, headland, head, foreland",
+   "977":"sandbar, sand bar",
+   "978":"seashore, coast, seacoast, sea-coast",
+   "979":"valley, vale",
+   "980":"volcano",
+   "981":"ballplayer, baseball player",
+   "982":"groom, bridegroom",
+   "983":"scuba diver",
+   "984":"rapeseed",
+   "985":"daisy",
+   "986":"yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+   "987":"corn",
+   "988":"acorn",
+   "989":"hip, rose hip, rosehip",
+   "990":"buckeye, horse chestnut, conker",
+   "991":"coral fungus",
+   "992":"agaric",
+   "993":"gyromitra",
+   "994":"stinkhorn, carrion fungus",
+   "995":"earthstar",
+   "996":"hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa",
+   "997":"bolete",
+   "998":"ear, spike, capitulum",
+   "999":"toilet tissue, toilet paper, bathroom tissue"
+}
\ No newline at end of file
--- a/GTSRB.Keras3/modules/ImagenetClassnames.py
+++ b/GTSRB.Keras3/modules/ImagenetClassnames.py
+# ------------------------------------------------------------------
+#     _____ _     _ _
+#    |  ___(_) __| | | ___
+#    | |_  | |/ _` | |/ _ \
+#    |  _| | | (_| | |  __/
+#    |_|   |_|\__,_|_|\___|                         Imagenet Classes
+# ------------------------------------------------------------------
+# Formation Introduction au Deep Learning (FIDLE) - CNRS/MIAI/UGA
+# ------------------------------------------------------------------
+# JL Parouty 2024
+
+
+import os
+import json
+
+class ImagenetClassnames:
+
+    classes_file = 'ImagenetClassnames.json'
+
+    def __init__(self):
+        path = os.path.abspath(__file__)
+        dir_path = os.path.dirname(path)
+        with open(f'{dir_path}/{self.classes_file}') as f:
+            self.classes = json.load(f)
+        print(f'Imagenet classes loaded ({len(self.classes)} classes)')
+
+
+    def get(self, classes_id, top_n=2):
+        top_classes = [self.classes[str(i)] for i in classes_id[-top_n:]]
+        return top_classes
\ No newline at end of file
--- a/GTSRB.Keras3/modules/my_TensorboardCallback.py
+++ b/GTSRB.Keras3/modules/my_TensorboardCallback.py
+
+# ------------------------------------------------------------------
+#     _____ _     _ _
+#    |  ___(_) __| | | ___
+#    | |_  | |/ _` | |/ _ \
+#    |  _| | | (_| | |  __/
+#    |_|   |_|\__,_|_|\___|                     Tensorboard callback
+# ------------------------------------------------------------------
+# Formation Introduction au Deep Learning (FIDLE) - CNRS/MIAI/UGA
+# ------------------------------------------------------------------
+# JL Parouty 2023
+#
+# See : https://keras.io/api/callbacks/
+# See : https://keras.io/guides/writing_your_own_callbacks/
+# See : https://pytorch.org/docs/stable/tensorboard.html
+
+import keras
+from torch.utils.tensorboard import SummaryWriter
+
+
+class TensorboardCallback(keras.callbacks.Callback):
+
+    def __init__(self, log_dir=None):
+        '''
+        Init callback
+        Args:
+            log_dir : log directory
+        '''
+        self.writer = SummaryWriter(log_dir=log_dir)
+
+
+    def on_epoch_end(self, epoch, logs=None):
+        '''
+        Record logs at epoch end
+        '''
+
+        # ---- Records all metrics (very simply)
+        #
+        # for k,v in logs.items():
+        #     self.writer.add_scalar(k,v, epoch)
+
+        # ---- Records and group specific metrics
+        #
+        self.writer.add_scalars('Accuracy',
+                                {'Train':logs['accuracy'],
+                                  'Validation':logs['val_accuracy']},
+                                 epoch )
+        
+        self.writer.add_scalars('Loss',
+                                {'Train':logs['loss'],
+                                  'Validation':logs['val_loss']},
+                                 epoch )
+
--- a/GTSRB.Keras3/modules/my_loader.py
+++ b/GTSRB.Keras3/modules/my_loader.py
+
+# ------------------------------------------------------------------
+#     _____ _     _ _
+#    |  ___(_) __| | | ___
+#    | |_  | |/ _` | |/ _ \
+#    |  _| | | (_| | |  __/
+#    |_|   |_|\__,_|_|\___|                           Dataset reader
+# ------------------------------------------------------------------
+# Formation Introduction au Deep Learning (FIDLE) - CNRS/MIAI/UGA
+# ------------------------------------------------------------------
+# JL Parouty 2023
+
+
+import h5py
+import os
+import fidle
+
+
+def read_dataset(enhanced_dir, dataset_name, scale=1):
+    '''
+    Reads h5 dataset
+    Args:
+        filename     : datasets filename
+        dataset_name : dataset name, without .h5
+    Returns:    
+        x_train,y_train, x_test,y_test data, x_meta,y_meta
+    '''
+
+    # ---- Read dataset
+    #
+    chrono=fidle.Chrono()
+    chrono.start()
+    filename = f'{enhanced_dir}/{dataset_name}.h5'
+    with  h5py.File(filename,'r') as f:
+        x_train = f['x_train'][:]
+        y_train = f['y_train'][:]
+        x_test  = f['x_test'][:]
+        y_test  = f['y_test'][:]
+        x_meta  = f['x_meta'][:]
+        y_meta  = f['y_meta'][:]
+
+    # ---- Rescale 
+    #
+    print('Original shape  :', x_train.shape, y_train.shape)
+    x_train,y_train, x_test,y_test = fidle.utils.rescale_dataset(x_train,y_train,x_test,y_test, scale=scale)
+    print('Rescaled shape  :', x_train.shape, y_train.shape)
+
+    # ---- Shuffle
+    #
+    x_train,y_train=fidle.utils.shuffle_np_dataset(x_train,y_train)
+
+    # ---- done
+    #
+    duration = chrono.get_delay()
+    size     = fidle.utils.hsize(os.path.getsize(filename))
+    print(f'\nDataset "{dataset_name}" is loaded and shuffled. ({size} in {duration})')
+    return x_train,y_train, x_test,y_test, x_meta,y_meta
+
+
+
+
+print('Module my_loader loaded.')
\ No newline at end of file
--- a/GTSRB.Keras3/modules/my_models.py
+++ b/GTSRB.Keras3/modules/my_models.py
+
+# ------------------------------------------------------------------
+#     _____ _     _ _
+#    |  ___(_) __| | | ___
+#    | |_  | |/ _` | |/ _ \
+#    |  _| | | (_| | |  __/
+#    |_|   |_|\__,_|_|\___|                         Some nice models
+# ------------------------------------------------------------------
+# Formation Introduction au Deep Learning (FIDLE) - CNRS/MIAI/UGA
+# ------------------------------------------------------------------
+# JL Parouty 2023
+
+
+import keras
+
+# ------------------------------------------------------------------
+# -- A simple model, for 24x24 or 48x48 images                    --
+# ------------------------------------------------------------------
+#
+def get_model_01(lx,ly,lz):
+    
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Input((lx,ly,lz)) )
+    
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu' ))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten()) 
+    model.add( keras.layers.Dense(1500, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+    
+
+# ------------------------------------------------------------------
+# -- A more sophisticated model, for 48x48 images                 --
+# ------------------------------------------------------------------
+#
+def get_model_02(lx,ly,lz):
+    model = keras.models.Sequential()
+    
+    model.add( keras.layers.Input((lx,ly,lz)) )
+    
+    model.add( keras.layers.Conv2D(32, (3,3),   activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Flatten()) 
+    model.add( keras.layers.Dense(1152, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+
+
+
+def get_model(name, lx,ly,lz):
+    '''
+    Return a model given by name
+    Args:
+        f_name : function name to retreive model
+        lxly,lz : inpuy shape 
+    Returns:
+        model
+    '''
+    if name=='model_01' : return get_model_01(lx,ly,lz)
+    if name=='model_02' : return get_model_01(lx,ly,lz)
+    print('*** Model not found : ', name)
+    return None
+
+# A More fun version ;-)
+def get_model2(name, lx,ly,lz):
+    get_model=globals()['get_'+name]
+    model=get_model(lx,ly,lz)
+    return model
+
+
+
+print('Module my_models loaded.')
\ No newline at end of file
--- a/GTSRB.Keras3/modules/my_tools.py
+++ b/GTSRB.Keras3/modules/my_tools.py
+
+# ------------------------------------------------------------------
+#     _____ _     _ _
+#    |  ___(_) __| | | ___
+#    | |_  | |/ _` | |/ _ \
+#    |  _| | | (_| | |  __/
+#    |_|   |_|\__,_|_|\___|          A small traffic sign classifier
+# ------------------------------------------------------------------
+# Formation Introduction au Deep Learning (FIDLE) - CNRS/MIAI/UGA
+# ------------------------------------------------------------------
+# JL Parouty 2023
+
+import numpy as np
+import matplotlib.pyplot as plt
+import fidle
+
+
+def show_prediction( prediction, x, y, x_meta ):
+
+    # ---- A prediction is just the output layer
+    #
+    fidle.utils.subtitle("Output layer from model is (x100) :")
+    with np.printoptions(precision=2, suppress=True, linewidth=95):
+        print(prediction*100)
+
+    # ---- Graphic visualisation
+    #
+    fidle.utils.subtitle("Graphically :")
+    plt.figure(figsize=(8,2))
+    plt.bar(range(43), prediction[0], align='center', alpha=0.5)
+    plt.ylabel('Probability')
+    plt.ylim((0,1))
+    plt.xlabel('Class')
+    plt.title('Trafic Sign prediction')
+    fidle.scrawler.save_fig('05-prediction-proba')
+    plt.show()
+
+    # ---- Predict class
+    #
+    p = np.argmax(prediction)
+
+    # ---- Show result
+    #
+    fidle.utils.subtitle('In pictures :')
+    print("\nThe image :               Prediction :            Real stuff:")
+    fidle.scrawler.images([x,x_meta[p], x_meta[y]], [p,p,y], range(3),  columns=3,  x_size=1.5, y_size=1.5, save_as='06-prediction-images')
+
+    if p==y:
+        print("YEEES ! that's right!")
+    else:
+        print("oups, that's wrong ;-(")
\ No newline at end of file
--- a/GTSRB/01-Preparation-of-data.ipynb
+++ b/GTSRB/01-Preparation-of-data.ipynb
--- a/GTSRB/02-First-convolutions.ipynb
+++ b/GTSRB/02-First-convolutions.ipynb
--- a/GTSRB/03-Tracking-and-visualizing.ipynb
+++ b/GTSRB/03-Tracking-and-visualizing.ipynb
--- a/GTSRB/04-Data-augmentation.ipynb
+++ b/GTSRB/04-Data-augmentation.ipynb
--- a/GTSRB/05-Full-convolutions.ipynb
+++ b/GTSRB/05-Full-convolutions.ipynb
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [GTS5] - CNN with GTSRB dataset - Full convolutions
-<!-- DESC --> Episode 5 : A lot of models, a lot of datasets and a lot of results.
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
-  - Try multiple solutions
-  - Design a generic and batch-usable code
-
-The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
-The final aim is to recognise them !
-Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
-
-
-## What we're going to do :
-
-Our main steps:
- - Try n models with n datasets
- - Save a Pandas/h5 report
- - Write to be run in batch mode
-
-## Step 1 - Import and init
-
-%% Cell type:code id: tags:
-
-``` python
-import tensorflow as tf
-from tensorflow import keras
-import numpy as np
-import h5py
-import sys,os,time,json
-import random
-from IPython.display import display
-sys.path.append('..')
-import fidle.pwk as pwk
-
-VERSION='1.6'
-
-datasets_dir = pwk.init('GTS5')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : GTS5
-    Run time             : Thursday 17 December 2020, 22:07:09
-    TensorFlow version   : 2.1.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /gpfswork/rech/mlh/uja62cb/datasets
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Start
-
-%% Cell type:code id: tags:
-
-``` python
-random.seed(time.time())
-
-# ---- Where I am ?
-now    = time.strftime("%A %d %B %Y - %Hh%Mm%Ss")
-here   = os.getcwd()
-tag_id = '{:06}'.format(random.randint(0,99999))
-
-# ---- Who I am ?
-oar_id   = os.getenv("OAR_JOB_ID",  "??")
-slurm_id = os.getenv("SLURM_JOBID", "??")
-
-print('Full Convolutions Notebook :')
-print('  Version            : {}'.format(VERSION))
-print('  Now is             : {}'.format(now))
-print('  OAR id             : {}'.format(oar_id))
-print('  SLURM id           : {}'.format(slurm_id))
-print('  Tag id             : {}'.format(tag_id))
-print('  Working directory  : {}'.format(here))
-print('  Output  directory  : ./run')
-print('  for tensorboard    : --logdir {}/run/logs_{}'.format(here,tag_id))
-```
-
-%% Output
-
-    Full Convolutions Notebook :
-      Version            : 1.6
-      Now is             : Thursday 17 December 2020 - 22h07m09s
-      OAR id             : ??
-      SLURM id           : 1874675
-      Tag id             : 002079
-      Working directory  : /gpfsdswork/projects/rech/mlh/uja62cb/fidle/GTSRB
-      Output  directory  : ./run
-      for tensorboard    : --logdir /gpfsdswork/projects/rech/mlh/uja62cb/fidle/GTSRB/run/logs_002079
-
-%% Cell type:code id: tags:
-
-``` python
-# ---- Uncomment for batch tests
-#
-# print("\n\n*** Test mode - Exit before making big treatments... ***\n\n")
-# sys.exit()
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 3 - Dataset loading
-
-%% Cell type:code id: tags:
-
-``` python
-def read_dataset(dataset_dir, name):
-    '''Reads h5 dataset from dataset_dir
-    Args:
-        dataset_dir : datasets dir
-        name        : dataset name, without .h5
-    Returns:    x_train,y_train,x_test,y_test data'''
-    # ---- Read dataset
-    filename = f'{dataset_dir}/GTSRB/enhanced/{name}.h5'
-    size     = os.path.getsize(filename)/(1024*1024)
-
-    with  h5py.File(filename,'r') as f:
-        x_train = f['x_train'][:]
-        y_train = f['y_train'][:]
-        x_test  = f['x_test'][:]
-        y_test  = f['y_test'][:]
-
-    # ---- done
-    return x_train,y_train,x_test,y_test,size
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 4 - Models collection
-
-%% Cell type:code id: tags:
-
-``` python
-
-# A basic model
-#
-def get_model_v1(lx,ly,lz):
-
-    model = keras.models.Sequential()
-
-    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
-    model.add( keras.layers.MaxPooling2D((2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D((2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Flatten())
-    model.add( keras.layers.Dense(1500, activation='relu'))
-    model.add( keras.layers.Dropout(0.5))
-
-    model.add( keras.layers.Dense(43, activation='softmax'))
-    return model
-
-# A more sophisticated model
-#
-def get_model_v2(lx,ly,lz):
-    model = keras.models.Sequential()
-
-    model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
-    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
-    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
-    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Flatten())
-    model.add( keras.layers.Dense(512, activation='relu'))
-    model.add( keras.layers.Dropout(0.5))
-    model.add( keras.layers.Dense(43, activation='softmax'))
-    return model
-
-def get_model_v3(lx,ly,lz):
-    model = keras.models.Sequential()
-    model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same',  activation='relu', input_shape=(lx,ly,lz)))
-    model.add(tf.keras.layers.BatchNormalization(axis=-1))
-    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add(tf.keras.layers.Dropout(0.2))
-
-    model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same',  activation='relu'))
-    model.add(tf.keras.layers.BatchNormalization(axis=-1))
-    model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))
-    model.add(tf.keras.layers.BatchNormalization(axis=-1))
-    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add(tf.keras.layers.Dropout(0.2))
-
-    model.add(tf.keras.layers.Flatten())
-    model.add(tf.keras.layers.Dense(512, activation='relu'))
-    model.add(tf.keras.layers.BatchNormalization())
-    model.add(tf.keras.layers.Dropout(0.4))
-
-    model.add(tf.keras.layers.Dense(43, activation='softmax'))
-    return model
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 5 - Multiple datasets, multiple models ;-)
-
-%% Cell type:code id: tags:
-
-``` python
-def multi_run(datasets_dir, datasets, models, datagen=None,
-              train_size=1, test_size=1, batch_size=64, epochs=16,
-              verbose=0, extension_dir='last'):
-    """
-    Launches a dataset-model combination
-    args:
-        datasets_dir   : Directory of the datasets
-        datasets       : List of dataset (whitout .h5)
-        models         : List of model like { "model name":get_model(), ...}
-        datagen        : Data generator or None (None)
-        train_size     : % of train dataset to use. 1 mean all. (1)
-        test_size      : % of test dataset to use.  1 mean all. (1)
-        batch_size     : Batch size (64)
-        epochs         : Number of epochs (16)
-        verbose        : Verbose level (0)
-        extension_dir  : postfix for logs and models dir (_last)
-    return:
-        report        : Report as a dict for Pandas.
-    """
-    # ---- Logs and models dir
-    #
-    os.makedirs(f'./run/logs_{extension_dir}',   mode=0o750, exist_ok=True)
-    os.makedirs(f'./run/models_{extension_dir}', mode=0o750, exist_ok=True)
-
-    # ---- Columns of output
-    #
-    output={}
-    output['Dataset'] = []
-    output['Size']    = []
-    for m in models:
-        output[m+'_Accuracy'] = []
-        output[m+'_Duration'] = []
-
-    # ---- Let's go
-    #
-    for d_name in datasets:
-        print("\nDataset : ",d_name)
-
-        # ---- Read dataset
-        x_train,y_train,x_test,y_test, d_size = read_dataset(datasets_dir, d_name)
-        output['Dataset'].append(d_name)
-        output['Size'].append(d_size)
-
-        # ---- Get the shape
-        (n,lx,ly,lz) = x_train.shape
-        n_train = int( x_train.shape[0] * train_size )
-        n_test  = int( x_test.shape[0]  * test_size  )
-
-        # ---- For each model
-        for m_name,m_function in models.items():
-            print("    Run model {}  : ".format(m_name), end='')
-            # ---- get model
-            try:
-                model=m_function(lx,ly,lz)
-                # ---- Compile it
-                model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
-                # ---- Callbacks tensorboard
-                log_dir = f"./run/logs_{extension_dir}/tb_{d_name}_{m_name}"
-                tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
-                # ---- Callbacks bestmodel
-                save_dir = f"./run/models_{extension_dir}/model_{d_name}_{m_name}.h5"
-                bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
-                # ---- Train
-                start_time = time.time()
-                if datagen==None:
-                    # ---- No data augmentation (datagen=None) --------------------------------------
-                    history = model.fit(x_train[:n_train], y_train[:n_train],
-                                        batch_size      = batch_size,
-                                        epochs          = epochs,
-                                        verbose         = verbose,
-                                        validation_data = (x_test[:n_test], y_test[:n_test]),
-                                        callbacks       = [tensorboard_callback, bestmodel_callback])
-                else:
-                    # ---- Data augmentation (datagen given) ----------------------------------------
-                    datagen.fit(x_train)
-                    history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
-                                        steps_per_epoch = int(n_train/batch_size),
-                                        epochs          = epochs,
-                                        verbose         = verbose,
-                                        validation_data = (x_test[:n_test], y_test[:n_test]),
-                                        callbacks       = [tensorboard_callback, bestmodel_callback])
-
-                # ---- Result
-                end_time = time.time()
-                duration = end_time-start_time
-                accuracy = max(history.history["val_accuracy"])*100
-                #
-                output[m_name+'_Accuracy'].append(accuracy)
-                output[m_name+'_Duration'].append(duration)
-                print(f"Accuracy={accuracy:.2f} and Duration={duration:.2f}")
-            except:
-                output[m_name+'_Accuracy'].append('0')
-                output[m_name+'_Duration'].append('999')
-                print('-')
-    return output
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 6 - Run !
-
-%% Cell type:code id: tags:
-
-``` python
-start_time = time.time()
-
-print('\n---- Run','-'*50)
-
-# --------- Datasets, models, and more.. -----------------------------------
-#
-# ---- For tests
-datasets   = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-RGB']
-models     = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}
-batch_size = 64
-epochs     = 5
-train_size = 0.2
-test_size  = 0.2
-with_datagen = False
-verbose      = 0
-#
-# ---- All possibilities
-# datasets     = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']
-# models       = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}
-# batch_size   = 64
-# epochs       = 16
-# train_size   = 1
-# test_size    = 1
-# with_datagen = False
-# verbose      = 0
-#
-# ---- Data augmentation
-# datasets     = ['set-48x48-RGB']
-# models       = {'v2':get_model_v2}
-# batch_size   = 64
-# epochs       = 20
-# train_size   = 1
-# test_size    = 1
-# with_datagen = True
-# verbose      = 0
-#
-# ---------------------------------------------------------------------------
-
-# ---- Data augmentation
-#
-if with_datagen :
-    datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
-                                                           featurewise_std_normalization=False,
-                                                           width_shift_range=0.1,
-                                                           height_shift_range=0.1,
-                                                           zoom_range=0.2,
-                                                           shear_range=0.1,
-                                                           rotation_range=10.)
-else:
-    datagen=None
-
-# ---- Run
-#
-output = multi_run(datasets_dir,
-                   datasets,
-                   models,
-                   datagen       = datagen,
-                   train_size    = train_size,
-                   test_size     = test_size,
-                   batch_size    = batch_size,
-                   epochs        = epochs,
-                   verbose       = verbose,
-                   extension_dir = tag_id)
-
-# ---- Save report
-#
-report={}
-report['output']=output
-report['description'] = f'train_size={train_size} test_size={test_size} batch_size={batch_size} epochs={epochs} data_aug={with_datagen}'
-
-report_name=f'./run/report_{tag_id}.json'
-
-with open(report_name, 'w') as file:
-    json.dump(report, file)
-
-print('\nReport saved as ',report_name)
-end_time = time.time()
-duration = end_time-start_time
-print(f'Duration : {duration:.2f} s')
-print('-'*59)
-```
-
-%% Output
-
-    
-    ---- Run --------------------------------------------------
-    
-    Dataset :  set-24x24-L
-        Run model v1  : WARNING:tensorflow:Method (on_train_batch_end) is slow compared to the batch update (0.218721). Check your callbacks.
-    Accuracy=88.99 and Duration=8.01
-        Run model v2  : Accuracy=87.77 and Duration=4.63
-        Run model v3  : Accuracy=88.80 and Duration=5.25
-    
-    Dataset :  set-24x24-RGB
-        Run model v1  : Accuracy=89.98 and Duration=5.12
-        Run model v2  : Accuracy=89.35 and Duration=4.56
-        Run model v3  : Accuracy=86.70 and Duration=5.22
-    
-    Dataset :  set-48x48-RGB
-        Run model v1  : Accuracy=88.64 and Duration=18.32
-        Run model v2  : Accuracy=89.71 and Duration=10.17
-        Run model v3  : Accuracy=92.16 and Duration=11.10
-    
-    Report saved as  ./run/report_002079.json
-    Duration : 77.23 s
-    -----------------------------------------------------------
-
-%% Cell type:markdown id: tags:
-
-## Step 7 - That's all folks..
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Thursday 17 December 2020, 22:08:26
-    Duration is : 00:01:17 312ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [GTS5] - CNN with GTSRB dataset - Full convolutions
-<!-- DESC --> Episode 5 : A lot of models, a lot of datasets and a lot of results.
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
-  - Try multiple solutions
-  - Design a generic and batch-usable code
-
-The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
-The final aim is to recognise them !
-Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
-
-
-## What we're going to do :
-
-Our main steps:
- - Try n models with n datasets
- - Save a Pandas/h5 report
- - Write to be run in batch mode
-
-## Step 1 - Import and init
-
-%% Cell type:code id: tags:
-
-``` python
-import tensorflow as tf
-from tensorflow import keras
-import numpy as np
-import h5py
-import sys,os,time,json
-import random
-from IPython.display import display
-sys.path.append('..')
-import fidle.pwk as pwk
-
-VERSION='1.6'
-
-datasets_dir = pwk.init('GTS5')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : GTS5
-    Run time             : Thursday 17 December 2020, 22:07:09
-    TensorFlow version   : 2.1.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /gpfswork/rech/mlh/uja62cb/datasets
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Start
-
-%% Cell type:code id: tags:
-
-``` python
-random.seed(time.time())
-
-# ---- Where I am ?
-now    = time.strftime("%A %d %B %Y - %Hh%Mm%Ss")
-here   = os.getcwd()
-tag_id = '{:06}'.format(random.randint(0,99999))
-
-# ---- Who I am ?
-oar_id   = os.getenv("OAR_JOB_ID",  "??")
-slurm_id = os.getenv("SLURM_JOBID", "??")
-
-print('Full Convolutions Notebook :')
-print('  Version            : {}'.format(VERSION))
-print('  Now is             : {}'.format(now))
-print('  OAR id             : {}'.format(oar_id))
-print('  SLURM id           : {}'.format(slurm_id))
-print('  Tag id             : {}'.format(tag_id))
-print('  Working directory  : {}'.format(here))
-print('  Output  directory  : ./run')
-print('  for tensorboard    : --logdir {}/run/logs_{}'.format(here,tag_id))
-```
-
-%% Output
-
-    Full Convolutions Notebook :
-      Version            : 1.6
-      Now is             : Thursday 17 December 2020 - 22h07m09s
-      OAR id             : ??
-      SLURM id           : 1874675
-      Tag id             : 002079
-      Working directory  : /gpfsdswork/projects/rech/mlh/uja62cb/fidle/GTSRB
-      Output  directory  : ./run
-      for tensorboard    : --logdir /gpfsdswork/projects/rech/mlh/uja62cb/fidle/GTSRB/run/logs_002079
-
-%% Cell type:code id: tags:
-
-``` python
-# ---- Uncomment for batch tests
-#
-# print("\n\n*** Test mode - Exit before making big treatments... ***\n\n")
-# sys.exit()
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 3 - Dataset loading
-
-%% Cell type:code id: tags:
-
-``` python
-def read_dataset(dataset_dir, name):
-    '''Reads h5 dataset from dataset_dir
-    Args:
-        dataset_dir : datasets dir
-        name        : dataset name, without .h5
-    Returns:    x_train,y_train,x_test,y_test data'''
-    # ---- Read dataset
-    filename = f'{dataset_dir}/GTSRB/enhanced/{name}.h5'
-    size     = os.path.getsize(filename)/(1024*1024)
-
-    with  h5py.File(filename,'r') as f:
-        x_train = f['x_train'][:]
-        y_train = f['y_train'][:]
-        x_test  = f['x_test'][:]
-        y_test  = f['y_test'][:]
-
-    # ---- done
-    return x_train,y_train,x_test,y_test,size
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 4 - Models collection
-
-%% Cell type:code id: tags:
-
-``` python
-
-# A basic model
-#
-def get_model_v1(lx,ly,lz):
-
-    model = keras.models.Sequential()
-
-    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
-    model.add( keras.layers.MaxPooling2D((2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D((2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Flatten())
-    model.add( keras.layers.Dense(1500, activation='relu'))
-    model.add( keras.layers.Dropout(0.5))
-
-    model.add( keras.layers.Dense(43, activation='softmax'))
-    return model
-
-# A more sophisticated model
-#
-def get_model_v2(lx,ly,lz):
-    model = keras.models.Sequential()
-
-    model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
-    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
-    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
-    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Flatten())
-    model.add( keras.layers.Dense(512, activation='relu'))
-    model.add( keras.layers.Dropout(0.5))
-    model.add( keras.layers.Dense(43, activation='softmax'))
-    return model
-
-def get_model_v3(lx,ly,lz):
-    model = keras.models.Sequential()
-    model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same',  activation='relu', input_shape=(lx,ly,lz)))
-    model.add(tf.keras.layers.BatchNormalization(axis=-1))
-    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add(tf.keras.layers.Dropout(0.2))
-
-    model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same',  activation='relu'))
-    model.add(tf.keras.layers.BatchNormalization(axis=-1))
-    model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))
-    model.add(tf.keras.layers.BatchNormalization(axis=-1))
-    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add(tf.keras.layers.Dropout(0.2))
-
-    model.add(tf.keras.layers.Flatten())
-    model.add(tf.keras.layers.Dense(512, activation='relu'))
-    model.add(tf.keras.layers.BatchNormalization())
-    model.add(tf.keras.layers.Dropout(0.4))
-
-    model.add(tf.keras.layers.Dense(43, activation='softmax'))
-    return model
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 5 - Multiple datasets, multiple models ;-)
-
-%% Cell type:code id: tags:
-
-``` python
-def multi_run(datasets_dir, datasets, models, datagen=None,
-              train_size=1, test_size=1, batch_size=64, epochs=16,
-              verbose=0, extension_dir='last'):
-    """
-    Launches a dataset-model combination
-    args:
-        datasets_dir   : Directory of the datasets
-        datasets       : List of dataset (whitout .h5)
-        models         : List of model like { "model name":get_model(), ...}
-        datagen        : Data generator or None (None)
-        train_size     : % of train dataset to use. 1 mean all. (1)
-        test_size      : % of test dataset to use.  1 mean all. (1)
-        batch_size     : Batch size (64)
-        epochs         : Number of epochs (16)
-        verbose        : Verbose level (0)
-        extension_dir  : postfix for logs and models dir (_last)
-    return:
-        report        : Report as a dict for Pandas.
-    """
-    # ---- Logs and models dir
-    #
-    os.makedirs(f'./run/logs_{extension_dir}',   mode=0o750, exist_ok=True)
-    os.makedirs(f'./run/models_{extension_dir}', mode=0o750, exist_ok=True)
-
-    # ---- Columns of output
-    #
-    output={}
-    output['Dataset'] = []
-    output['Size']    = []
-    for m in models:
-        output[m+'_Accuracy'] = []
-        output[m+'_Duration'] = []
-
-    # ---- Let's go
-    #
-    for d_name in datasets:
-        print("\nDataset : ",d_name)
-
-        # ---- Read dataset
-        x_train,y_train,x_test,y_test, d_size = read_dataset(datasets_dir, d_name)
-        output['Dataset'].append(d_name)
-        output['Size'].append(d_size)
-
-        # ---- Get the shape
-        (n,lx,ly,lz) = x_train.shape
-        n_train = int( x_train.shape[0] * train_size )
-        n_test  = int( x_test.shape[0]  * test_size  )
-
-        # ---- For each model
-        for m_name,m_function in models.items():
-            print("    Run model {}  : ".format(m_name), end='')
-            # ---- get model
-            try:
-                model=m_function(lx,ly,lz)
-                # ---- Compile it
-                model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
-                # ---- Callbacks tensorboard
-                log_dir = f"./run/logs_{extension_dir}/tb_{d_name}_{m_name}"
-                tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
-                # ---- Callbacks bestmodel
-                save_dir = f"./run/models_{extension_dir}/model_{d_name}_{m_name}.h5"
-                bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
-                # ---- Train
-                start_time = time.time()
-                if datagen==None:
-                    # ---- No data augmentation (datagen=None) --------------------------------------
-                    history = model.fit(x_train[:n_train], y_train[:n_train],
-                                        batch_size      = batch_size,
-                                        epochs          = epochs,
-                                        verbose         = verbose,
-                                        validation_data = (x_test[:n_test], y_test[:n_test]),
-                                        callbacks       = [tensorboard_callback, bestmodel_callback])
-                else:
-                    # ---- Data augmentation (datagen given) ----------------------------------------
-                    datagen.fit(x_train)
-                    history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
-                                        steps_per_epoch = int(n_train/batch_size),
-                                        epochs          = epochs,
-                                        verbose         = verbose,
-                                        validation_data = (x_test[:n_test], y_test[:n_test]),
-                                        callbacks       = [tensorboard_callback, bestmodel_callback])
-
-                # ---- Result
-                end_time = time.time()
-                duration = end_time-start_time
-                accuracy = max(history.history["val_accuracy"])*100
-                #
-                output[m_name+'_Accuracy'].append(accuracy)
-                output[m_name+'_Duration'].append(duration)
-                print(f"Accuracy={accuracy:.2f} and Duration={duration:.2f}")
-            except:
-                output[m_name+'_Accuracy'].append('0')
-                output[m_name+'_Duration'].append('999')
-                print('-')
-    return output
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 6 - Run !
-
-%% Cell type:code id: tags:
-
-``` python
-start_time = time.time()
-
-print('\n---- Run','-'*50)
-
-# --------- Datasets, models, and more.. -----------------------------------
-#
-# ---- For tests
-datasets   = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-RGB']
-models     = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}
-batch_size = 64
-epochs     = 5
-train_size = 0.2
-test_size  = 0.2
-with_datagen = False
-verbose      = 0
-#
-# ---- All possibilities
-# datasets     = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']
-# models       = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}
-# batch_size   = 64
-# epochs       = 16
-# train_size   = 1
-# test_size    = 1
-# with_datagen = False
-# verbose      = 0
-#
-# ---- Data augmentation
-# datasets     = ['set-48x48-RGB']
-# models       = {'v2':get_model_v2}
-# batch_size   = 64
-# epochs       = 20
-# train_size   = 1
-# test_size    = 1
-# with_datagen = True
-# verbose      = 0
-#
-# ---------------------------------------------------------------------------
-
-# ---- Data augmentation
-#
-if with_datagen :
-    datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
-                                                           featurewise_std_normalization=False,
-                                                           width_shift_range=0.1,
-                                                           height_shift_range=0.1,
-                                                           zoom_range=0.2,
-                                                           shear_range=0.1,
-                                                           rotation_range=10.)
-else:
-    datagen=None
-
-# ---- Run
-#
-output = multi_run(datasets_dir,
-                   datasets,
-                   models,
-                   datagen       = datagen,
-                   train_size    = train_size,
-                   test_size     = test_size,
-                   batch_size    = batch_size,
-                   epochs        = epochs,
-                   verbose       = verbose,
-                   extension_dir = tag_id)
-
-# ---- Save report
-#
-report={}
-report['output']=output
-report['description'] = f'train_size={train_size} test_size={test_size} batch_size={batch_size} epochs={epochs} data_aug={with_datagen}'
-
-report_name=f'./run/report_{tag_id}.json'
-
-with open(report_name, 'w') as file:
-    json.dump(report, file)
-
-print('\nReport saved as ',report_name)
-end_time = time.time()
-duration = end_time-start_time
-print(f'Duration : {duration:.2f} s')
-print('-'*59)
-```
-
-%% Output
-
-    
-    ---- Run --------------------------------------------------
-    
-    Dataset :  set-24x24-L
-        Run model v1  : WARNING:tensorflow:Method (on_train_batch_end) is slow compared to the batch update (0.218721). Check your callbacks.
-    Accuracy=88.99 and Duration=8.01
-        Run model v2  : Accuracy=87.77 and Duration=4.63
-        Run model v3  : Accuracy=88.80 and Duration=5.25
-    
-    Dataset :  set-24x24-RGB
-        Run model v1  : Accuracy=89.98 and Duration=5.12
-        Run model v2  : Accuracy=89.35 and Duration=4.56
-        Run model v3  : Accuracy=86.70 and Duration=5.22
-    
-    Dataset :  set-48x48-RGB
-        Run model v1  : Accuracy=88.64 and Duration=18.32
-        Run model v2  : Accuracy=89.71 and Duration=10.17
-        Run model v3  : Accuracy=92.16 and Duration=11.10
-    
-    Report saved as  ./run/report_002079.json
-    Duration : 77.23 s
-    -----------------------------------------------------------
-
-%% Cell type:markdown id: tags:
-
-## Step 7 - That's all folks..
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Thursday 17 December 2020, 22:08:26
-    Duration is : 00:01:17 312ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
--- a/GTSRB/06-Notebook-as-a-batch.ipynb
+++ b/GTSRB/06-Notebook-as-a-batch.ipynb
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [GTS6] - Full convolutions as a batch
-<!-- DESC --> Episode 6 : Run Full convolution notebook as a batch
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
-  - Run a notebook code as a **job**
-  - Follow up with Tensorboard
-
-The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
-The final aim is to recognise them !
-Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
-
-
-## What we're going to do :
-Our main steps:
- - Run Full-convolution.ipynb as a batch :
-    - Notebook mode
-    - Script mode
- - Tensorboard follow up
-
-%% Cell type:markdown id: tags:
-
-### Step 0 - Just for convenience
-
-%% Cell type:code id: tags:
-
-``` python
-import sys
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('GTS6')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : GTS6
-    Run time             : Friday 18 December 2020, 13:33:50
-    TensorFlow version   : 2.1.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /gpfswork/rech/mlh/uja62cb/datasets
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 1 - How to run a notebook as a batch ?
-
-Two simple solutions are possible :-)
-
- - **Option 1 - As a notebook ! (a good choice)**
-
-  Very simple.
-  The result is the executed notebook, so we can retrieve all the cell'soutputs of the notebook :
-  ```jupyter nbconvert (...) --to notebook --execute <notebook>```
-
-  Example :
-  ```jupyter nbconvert --ExecutePreprocessor.timeout=-1 --to notebook --execute my_notebook.ipynb'```
-  The result will be a notebook: 'my_notebook.nbconvert.ipynb'.
-
-  See: [nbconvert documentation](https://nbconvert.readthedocs.io/en/latest/usage.html#convert-notebook)
-
- - **Option 2 - As a script**
-
-  Very simple too, but with some constraints on the notebook.
-  We will convert the notebook to a Python script (IPython, to be precise) :
-  ```jupyter nbconvert --to script <notebook>```
-
-  Then we can execute this script :
-  ```ipython <script>```
-
-  See: [nbconvert documentation](https://nbconvert.readthedocs.io/en/latest/usage.html#executable-script)
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Run as a script
-
-Maybe not always the best solution, but this solution is very rustic !
-
-### 2.1 - Convert to IPython script :
-
-%% Cell type:code id: tags:
-
-``` python
-%%bash
-jupyter nbconvert --to script --output='./run/full_convolutions' '05-Full-convolutions.ipynb'
-ls -l ./run/*.py
-```
-
-%% Output
-
-    -rw-r--r-- 1 uja62cb mlh 12896 Dec 18 13:34 ./run/full_convolutions.py
-
-    [NbConvertApp] Converting notebook 05-Full-convolutions.ipynb to script
-    [NbConvertApp] Writing 12896 bytes to ./run/full_convolutions.py
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Batch submission
-
-See the two examples of bash launch script :
- - `batch_slurm.sh` using Slurm (like at IDRIS)
- - `batch_oar.sh`   using OAR (like at GRICAD)
-
-%% Cell type:markdown id: tags:
-
-#### Example at IDRIS
-
-On the frontal :
-```bash
-# hostname
-jean-zay2
-
-# sbatch $WORK/fidle/GTSRB/batch_slurm.sh
-Submitted batch job 249794
-
-#squeue -u $USER
-             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
-            249794    gpu_p1 GTSRB Fu  uja62cb PD       0:00      1 (Resources)
-
-# ls -l _batch/
-total 32769
-rw-r--r-- 1 uja62cb gensim01 13349 Sep 10 11:32 GTSRB_249794.err
-rw-r--r-- 1 uja62cb gensim01   489 Sep 10 11:31 GTSRB_249794.out
-```
-
-%% Cell type:markdown id: tags:
-
-#### Example at GRICAD
-
-Have to be done on the frontal :
-```bash
-# hostname
-f-dahu
-
-# pwd
-/home/paroutyj
-
-# oarsub -S ~/fidle/GTSRB/batch_oar.sh
-[GPUNODE] Adding gpu node restriction
-[ADMISSION RULE] Modify resource description with type constraints
-
-#oarstat -u
-Job id    S User     Duration   System message
--------- - -------- ---------- ------------------------------------------------
-5878410   R paroutyj    0:19:56 R=8,W=1:0:0,J=I,P=fidle,T=gpu (Karma=0.005,quota_ok)
-5896266   W paroutyj    0:00:00 R=8,W=1:0:0,J=B,N=Full convolutions,P=fidle,T=gpu
-
-# ls -l
-total 8
-rw-r--r-- 1 paroutyj l-simap    0 Feb 28 15:58 batch_oar_5896266.err
-rw-r--r-- 1 paroutyj l-simap 5703 Feb 28 15:58 batch_oar_5896266.out
-```
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Friday 18 December 2020, 13:34:12
-    Duration is : 00:00:22 377ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
----
-<div class='todo'>
- Your mission if you accept it: Run our full_convolution code in batch mode.<br>
- For that :
-  <ul>
-      <li>Validate the full_convolution notebook on short tests</li>
-      <li>Submit it in batch mode for validation</li>
-      <li>Modify the notebook for a full run and submit it :-)</li>
-  </ul>
-
-</div>
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [GTS6] - Full convolutions as a batch
-<!-- DESC --> Episode 6 : Run Full convolution notebook as a batch
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
-  - Run a notebook code as a **job**
-  - Follow up with Tensorboard
-
-The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
-The final aim is to recognise them !
-Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
-
-
-## What we're going to do :
-Our main steps:
- - Run Full-convolution.ipynb as a batch :
-    - Notebook mode
-    - Script mode
- - Tensorboard follow up
-
-%% Cell type:markdown id: tags:
-
-### Step 0 - Just for convenience
-
-%% Cell type:code id: tags:
-
-``` python
-import sys
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('GTS6')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : GTS6
-    Run time             : Friday 18 December 2020, 13:33:50
-    TensorFlow version   : 2.1.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /gpfswork/rech/mlh/uja62cb/datasets
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 1 - How to run a notebook as a batch ?
-
-Two simple solutions are possible :-)
-
- - **Option 1 - As a notebook ! (a good choice)**
-
-  Very simple.
-  The result is the executed notebook, so we can retrieve all the cell'soutputs of the notebook :
-  ```jupyter nbconvert (...) --to notebook --execute <notebook>```
-
-  Example :
-  ```jupyter nbconvert --ExecutePreprocessor.timeout=-1 --to notebook --execute my_notebook.ipynb'```
-  The result will be a notebook: 'my_notebook.nbconvert.ipynb'.
-
-  See: [nbconvert documentation](https://nbconvert.readthedocs.io/en/latest/usage.html#convert-notebook)
-
- - **Option 2 - As a script**
-
-  Very simple too, but with some constraints on the notebook.
-  We will convert the notebook to a Python script (IPython, to be precise) :
-  ```jupyter nbconvert --to script <notebook>```
-
-  Then we can execute this script :
-  ```ipython <script>```
-
-  See: [nbconvert documentation](https://nbconvert.readthedocs.io/en/latest/usage.html#executable-script)
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Run as a script
-
-Maybe not always the best solution, but this solution is very rustic !
-
-### 2.1 - Convert to IPython script :
-
-%% Cell type:code id: tags:
-
-``` python
-%%bash
-jupyter nbconvert --to script --output='./run/full_convolutions' '05-Full-convolutions.ipynb'
-ls -l ./run/*.py
-```
-
-%% Output
-
-    -rw-r--r-- 1 uja62cb mlh 12896 Dec 18 13:34 ./run/full_convolutions.py
-
-    [NbConvertApp] Converting notebook 05-Full-convolutions.ipynb to script
-    [NbConvertApp] Writing 12896 bytes to ./run/full_convolutions.py
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Batch submission
-
-See the two examples of bash launch script :
- - `batch_slurm.sh` using Slurm (like at IDRIS)
- - `batch_oar.sh`   using OAR (like at GRICAD)
-
-%% Cell type:markdown id: tags:
-
-#### Example at IDRIS
-
-On the frontal :
-```bash
-# hostname
-jean-zay2
-
-# sbatch $WORK/fidle/GTSRB/batch_slurm.sh
-Submitted batch job 249794
-
-#squeue -u $USER
-             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
-            249794    gpu_p1 GTSRB Fu  uja62cb PD       0:00      1 (Resources)
-
-# ls -l _batch/
-total 32769
-rw-r--r-- 1 uja62cb gensim01 13349 Sep 10 11:32 GTSRB_249794.err
-rw-r--r-- 1 uja62cb gensim01   489 Sep 10 11:31 GTSRB_249794.out
-```
-
-%% Cell type:markdown id: tags:
-
-#### Example at GRICAD
-
-Have to be done on the frontal :
-```bash
-# hostname
-f-dahu
-
-# pwd
-/home/paroutyj
-
-# oarsub -S ~/fidle/GTSRB/batch_oar.sh
-[GPUNODE] Adding gpu node restriction
-[ADMISSION RULE] Modify resource description with type constraints
-
-#oarstat -u
-Job id    S User     Duration   System message
--------- - -------- ---------- ------------------------------------------------
-5878410   R paroutyj    0:19:56 R=8,W=1:0:0,J=I,P=fidle,T=gpu (Karma=0.005,quota_ok)
-5896266   W paroutyj    0:00:00 R=8,W=1:0:0,J=B,N=Full convolutions,P=fidle,T=gpu
-
-# ls -l
-total 8
-rw-r--r-- 1 paroutyj l-simap    0 Feb 28 15:58 batch_oar_5896266.err
-rw-r--r-- 1 paroutyj l-simap 5703 Feb 28 15:58 batch_oar_5896266.out
-```
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Friday 18 December 2020, 13:34:12
-    Duration is : 00:00:22 377ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
----
-<div class='todo'>
- Your mission if you accept it: Run our full_convolution code in batch mode.<br>
- For that :
-  <ul>
-      <li>Validate the full_convolution notebook on short tests</li>
-      <li>Submit it in batch mode for validation</li>
-      <li>Modify the notebook for a full run and submit it :-)</li>
-  </ul>
-
-</div>
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
--- a/GTSRB/07-Show-report.ipynb
+++ b/GTSRB/07-Show-report.ipynb
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [GTS7] - CNN with GTSRB dataset - Show reports
-<!-- DESC -->  Episode 7 : Displaying a jobs report
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
- - Compare the results of different dataset-model combinations
-
-Les rapports (format json) sont générés par les jobs "Full convolution" [GTS5][GTS6]
-
-
-## What we're going to do :
-
- - Read json files and display results
-
-## 1/ Python import
-
-%% Cell type:code id: tags:
-
-``` python
-import pandas as pd
-import sys,os,glob,json
-from pathlib import Path
-from IPython.display import display, Markdown
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('GTS7')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : GTS7
-    Run time             : Friday 18 December 2020, 13:52:25
-    TensorFlow version   : 2.1.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /gpfswork/rech/mlh/uja62cb/datasets
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## 2/ Few nice functions
-
-%% Cell type:code id: tags:
-
-``` python
-def highlight_max(s):
-    is_max = (s == s.max())
-    return ['background-color: yellow' if v else '' for v in is_max]
-
-def show_report(file):
-    # ---- Read json file
-    with open(file) as infile:
-        dict_report = json.load( infile )
-    output      = dict_report['output']
-    description = dict_report['description']
-    # ---- about
-    pwk.subtitle(f'Report : {Path(file).stem}')
-    print(    "Desc.  : ",description,'\n')
-    # ---- Create a pandas
-    report       = pd.DataFrame (output)
-    col_accuracy = [ c for c in output.keys() if c.endswith('Accuracy')]
-    col_duration = [ c for c in output.keys() if c.endswith('Duration')]
-    # ---- Build formats
-    lambda_acc = lambda x : '{:.2f} %'.format(x) if (isinstance(x, float)) else '{:}'.format(x)
-    lambda_dur = lambda x : '{:.1f} s'.format(x) if (isinstance(x, float)) else '{:}'.format(x)
-    formats = {'Size':'{:.2f} Mo'}
-    for c in col_accuracy:
-        formats[c]=lambda_acc
-    for c in col_duration:
-        formats[c]=lambda_dur
-    t=report.style.highlight_max(subset=col_accuracy).format(formats).hide_index()
-    display(t)
-```
-
-%% Cell type:markdown id: tags:
-
-## 3/ Reports display
-
-%% Cell type:code id: tags:
-
-``` python
-for file in glob.glob("./run/*.json"):
-    show_report(file)
-```
-
-%% Output
-
-    <br>**Report : report_002079**
-
-    Desc.  :  train_size=0.2 test_size=0.2 batch_size=64 epochs=5 data_aug=False
-    
-
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Friday 18 December 2020, 13:52:25
-    Duration is : 00:00:01 541ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [GTS7] - CNN with GTSRB dataset - Show reports
-<!-- DESC -->  Episode 7 : Displaying a jobs report
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
- - Compare the results of different dataset-model combinations
-
-Les rapports (format json) sont générés par les jobs "Full convolution" [GTS5][GTS6]
-
-
-## What we're going to do :
-
- - Read json files and display results
-
-## 1/ Python import
-
-%% Cell type:code id: tags:
-
-``` python
-import pandas as pd
-import sys,os,glob,json
-from pathlib import Path
-from IPython.display import display, Markdown
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('GTS7')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : GTS7
-    Run time             : Friday 18 December 2020, 13:52:25
-    TensorFlow version   : 2.1.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /gpfswork/rech/mlh/uja62cb/datasets
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## 2/ Few nice functions
-
-%% Cell type:code id: tags:
-
-``` python
-def highlight_max(s):
-    is_max = (s == s.max())
-    return ['background-color: yellow' if v else '' for v in is_max]
-
-def show_report(file):
-    # ---- Read json file
-    with open(file) as infile:
-        dict_report = json.load( infile )
-    output      = dict_report['output']
-    description = dict_report['description']
-    # ---- about
-    pwk.subtitle(f'Report : {Path(file).stem}')
-    print(    "Desc.  : ",description,'\n')
-    # ---- Create a pandas
-    report       = pd.DataFrame (output)
-    col_accuracy = [ c for c in output.keys() if c.endswith('Accuracy')]
-    col_duration = [ c for c in output.keys() if c.endswith('Duration')]
-    # ---- Build formats
-    lambda_acc = lambda x : '{:.2f} %'.format(x) if (isinstance(x, float)) else '{:}'.format(x)
-    lambda_dur = lambda x : '{:.1f} s'.format(x) if (isinstance(x, float)) else '{:}'.format(x)
-    formats = {'Size':'{:.2f} Mo'}
-    for c in col_accuracy:
-        formats[c]=lambda_acc
-    for c in col_duration:
-        formats[c]=lambda_dur
-    t=report.style.highlight_max(subset=col_accuracy).format(formats).hide_index()
-    display(t)
-```
-
-%% Cell type:markdown id: tags:
-
-## 3/ Reports display
-
-%% Cell type:code id: tags:
-
-``` python
-for file in glob.glob("./run/*.json"):
-    show_report(file)
-```
-
-%% Output
-
-    <br>**Report : report_002079**
-
-    Desc.  :  train_size=0.2 test_size=0.2 batch_size=64 epochs=5 data_aug=False
-    
-
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Friday 18 December 2020, 13:52:25
-    Duration is : 00:00:01 541ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
--- a/IMDB/01-Embedding-Keras.ipynb
+++ b/IMDB/01-Embedding-Keras.ipynb
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [IMDB1] - Text embedding with IMDB
-<!-- DESC --> A very classical example of word embedding for text classification (sentiment analysis)
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
- - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
- - Understand the management of **textual data** and **sentiment analysis**
-
-Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
-Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
-For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-
-## What we're going to do :
-
- - Retrieve data
- - Preparing the data
- - Build a model
- - Train the model
- - Evaluate the result
-
-%% Cell type:markdown id: tags:
-
-## Step 1 - Init python stuff
-
-%% Cell type:code id: tags:
-
-``` python
-import numpy as np
-
-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
-
-import matplotlib.pyplot as plt
-import matplotlib
-
-import os,sys,h5py,json
-from importlib import reload
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('IMDB1')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : IMDB1
-    Run time             : Friday 18 December 2020, 17:52:06
-    TensorFlow version   : 2.0.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /home/pjluc/datasets/fidle
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Retrieve data
-
-IMDb dataset can bet get directly from Keras - see [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-Note : Due to their nature, textual data can be somewhat complex.
-
-### 2.1 - Data structure :
-The dataset is composed of 2 parts:
-
- - **reviews**, this will be our **x**
- - **opinions** (positive/negative), this will be our **y**
-
-There are also a **dictionary**, because words are indexed in reviews
-
-```
-<dataset> = (<reviews>, <opinions>)
-
-with :  <reviews>  = [ <review1>, <review2>, ... ]
-        <opinions> = [ <rate1>,   <rate2>,   ... ]   where <ratei>   = integer
-
-where : <reviewi> = [ <w1>, <w2>, ...]    <wi> are the index (int) of the word in the dictionary
-        <ratei>   = int                   0 for negative opinion, 1 for positive
-
-
-<dictionary> = [ <word1>:<w1>, <word2>:<w2>, ... ]
-
-with :  <wordi>   = word
-        <wi>      = int
-
-```
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Get dataset
-For simplicity, we will use a pre-formatted dataset - See [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb/load_data)
-However, Keras offers some usefull tools for formatting textual data - See [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text)
-
-**Load dataset :**
-
-%% Cell type:code id: tags:
-
-``` python
-vocab_size = 10000
-
-# ----- Retrieve x,y
-
-# Uncomment this if you want to load dataset directly from keras (small size <20M)
-#
-(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
-                                                       skip_top   = 0,
-                                                       maxlen     = None,
-                                                       seed       = 42,
-                                                       start_char = 1,
-                                                       oov_char   = 2,
-                                                       index_from = 3, )
-
-# To load a h5 version of the dataset :
-#
-# with  h5py.File(f'{datasets_dir}/IMDB/origine/dataset_imdb.h5','r') as f:
-#        x_train = f['x_train'][:]
-#        y_train = f['y_train'][:]
-#        x_test  = f['x_test'][:]
-#        y_test  = f['y_test'][:]
-```
-
-%% Output
-
-    /home/pjluc/anaconda3/envs/fidle/lib/python3.7/site-packages/tensorflow_core/python/keras/datasets/imdb.py:129: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
-      x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
-    /home/pjluc/anaconda3/envs/fidle/lib/python3.7/site-packages/tensorflow_core/python/keras/datasets/imdb.py:130: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
-      x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])
-
-%% Cell type:markdown id: tags:
-
-**About this dataset :**
-
-%% Cell type:code id: tags:
-
-``` python
-print("  Max(x_train,x_test)  : ", pwk.rmax([x_train,x_test]) )
-print("  x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
-print("  x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
-
-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-```
-
-%% Output
-
-      Max(x_train,x_test)  :  9999
-      x_train : (25000,)  y_train : (25000,)
-      x_test  : (25000,)  y_test  : (25000,)
-    
-    Review example (x_train[12]) :
-    
-     [1, 14, 22, 1367, 53, 206, 159, 4, 636, 898, 74, 26, 11, 436, 363, 108, 7, 14, 432, 14, 22, 9, 1055, 34, 8599, 2, 5, 381, 3705, 4509, 14, 768, 47, 839, 25, 111, 1517, 2579, 1991, 438, 2663, 587, 4, 280, 725, 6, 58, 11, 2714, 201, 4, 206, 16, 702, 5, 5176, 19, 480, 5920, 157, 13, 64, 219, 4, 2, 11, 107, 665, 1212, 39, 4, 206, 4, 65, 410, 16, 565, 5, 24, 43, 343, 17, 5602, 8, 169, 101, 85, 206, 108, 8, 3008, 14, 25, 215, 168, 18, 6, 2579, 1991, 438, 2, 11, 129, 1609, 36, 26, 66, 290, 3303, 46, 5, 633, 115, 4363]
-
-%% Cell type:markdown id: tags:
-
-### 2.3 - Have a look for humans (optional)
-When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
-So, we shifted the dataset by 3 with the parameter index_from=3
-
-**Load dictionary :**
-
-%% Cell type:code id: tags:
-
-``` python
-# ---- Retrieve dictionary {word:index}, and encode it in ascii
-#
-word_index = imdb.get_word_index()
-
-# ---- Shift the dictionary from +3
-#
-word_index = {w:(i+3) for w,i in word_index.items()}
-
-# ---- Add <pad>, <start> and unknown tags
-#
-word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2} )
-
-# ---- Create a reverse dictionary : {index:word}
-#
-index_word = {index:word for word,index in word_index.items()}
-
-# ---- Add a nice function to transpose :
-#
-def dataset2text(review):
-    return ' '.join([index_word.get(i, '?') for i in review])
-```
-
-%% Cell type:markdown id: tags:
-
-**Have a look :**
-
-%% Cell type:code id: tags:
-
-``` python
-print('\nDictionary size     : ', len(word_index))
-for k in range(440,455):print(f'{k:2d} : {index_word[k]}' )
-pwk.subtitle('Review example :')
-print(x_train[12])
-pwk.subtitle('After translation :')
-print(dataset2text(x_train[12]))
-```
-
-%% Output
-
-    
-    Dictionary size     :  88587
-    440 : hope
-    441 : entertaining
-    442 : she's
-    443 : mr
-    444 : overall
-    445 : evil
-    446 : called
-    447 : loved
-    448 : based
-    449 : oh
-    450 : several
-    451 : fans
-    452 : mother
-    453 : drama
-    454 : beginning
-
-    <br>**Review example :**
-
-    [1, 14, 22, 1367, 53, 206, 159, 4, 636, 898, 74, 26, 11, 436, 363, 108, 7, 14, 432, 14, 22, 9, 1055, 34, 8599, 2, 5, 381, 3705, 4509, 14, 768, 47, 839, 25, 111, 1517, 2579, 1991, 438, 2663, 587, 4, 280, 725, 6, 58, 11, 2714, 201, 4, 206, 16, 702, 5, 5176, 19, 480, 5920, 157, 13, 64, 219, 4, 2, 11, 107, 665, 1212, 39, 4, 206, 4, 65, 410, 16, 565, 5, 24, 43, 343, 17, 5602, 8, 169, 101, 85, 206, 108, 8, 3008, 14, 25, 215, 168, 18, 6, 2579, 1991, 438, 2, 11, 129, 1609, 36, 26, 66, 290, 3303, 46, 5, 633, 115, 4363]
-
-    <br>**After translation :**
-
-    <start> this film contains more action before the opening credits than are in entire hollywood films of this sort this film is produced by tsui <unknown> and stars jet li this team has brought you many worthy hong kong cinema productions including the once upon a time in china series the action was fast and furious with amazing wire work i only saw the <unknown> in two shots aside from the action the story itself was strong and not just used as filler to find any other action films to rival this you must look for a hong kong cinema <unknown> in your area they are really worth checking out and usually never disappoint
-
-%% Cell type:markdown id: tags:
-
-### 2.4 - Have a look for NN
-
-%% Cell type:code id: tags:
-
-``` python
-sizes=[len(i) for i in x_train]
-plt.figure(figsize=(16,6))
-plt.hist(sizes, bins=400)
-plt.gca().set(title='Distribution of reviews by size - [{:5.2f}, {:5.2f}]'.format(min(sizes),max(sizes)),
-              xlabel='Size', ylabel='Density', xlim=[0,1500])
-pwk.save_fig('01-stats-sizes')
-plt.show()
-```
-
-%% Output
-
-
-
-
-%% Cell type:markdown id: tags:
-
-## Step 3 - Preprocess the data (padding)
-In order to be processed by an NN, all entries must have the **same length.**
-We chose a review length of **review_len**
-We will therefore complete them with a padding (of \<pad\>\)
-
-%% Cell type:code id: tags:
-
-``` python
-review_len = 256
-
-x_train = keras.preprocessing.sequence.pad_sequences(x_train,
-                                                     value   = 0,
-                                                     padding = 'post',
-                                                     maxlen  = review_len)
-
-x_test  = keras.preprocessing.sequence.pad_sequences(x_test,
-                                                     value   = 0 ,
-                                                     padding = 'post',
-                                                     maxlen  = review_len)
-
-pwk.subtitle('After padding :')
-print(x_train[12])
-pwk.subtitle('In real words :')
-print(dataset2text(x_train[12]))
-```
-
-%% Output
-
-    <br>**After padding :**
-
-    [   1   14   22 1367   53  206  159    4  636  898   74   26   11  436
-      363  108    7   14  432   14   22    9 1055   34 8599    2    5  381
-     3705 4509   14  768   47  839   25  111 1517 2579 1991  438 2663  587
-        4  280  725    6   58   11 2714  201    4  206   16  702    5 5176
-       19  480 5920  157   13   64  219    4    2   11  107  665 1212   39
-        4  206    4   65  410   16  565    5   24   43  343   17 5602    8
-      169  101   85  206  108    8 3008   14   25  215  168   18    6 2579
-     1991  438    2   11  129 1609   36   26   66  290 3303   46    5  633
-      115 4363    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0]
-
-    <br>**In real words :**
-
-    <start> this film contains more action before the opening credits than are in entire hollywood films of this sort this film is produced by tsui <unknown> and stars jet li this team has brought you many worthy hong kong cinema productions including the once upon a time in china series the action was fast and furious with amazing wire work i only saw the <unknown> in two shots aside from the action the story itself was strong and not just used as filler to find any other action films to rival this you must look for a hong kong cinema <unknown> in your area they are really worth checking out and usually never disappoint <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
-
-%% Cell type:markdown id: tags:
-
-**Save dataset and dictionary (For future use but not mandatory)**
-
-%% Cell type:code id: tags:
-
-``` python
-# ---- Write dataset in a h5 file, could be usefull
-#
-output_dir = './data'
-pwk.mkdir(output_dir)
-
-with h5py.File(f'{output_dir}/dataset_imdb.h5', 'w') as f:
-    f.create_dataset("x_train",    data=x_train)
-    f.create_dataset("y_train",    data=y_train)
-    f.create_dataset("x_test",     data=x_test)
-    f.create_dataset("y_test",     data=y_test)
-
-with open(f'{output_dir}/word_index.json', 'w') as fp:
-    json.dump(word_index, fp)
-
-with open(f'{output_dir}/index_word.json', 'w') as fp:
-    json.dump(index_word, fp)
-
-print('Saved.')
-```
-
-%% Output
-
-    Saved.
-
-%% Cell type:markdown id: tags:
-
-## Step 4 - Build the model
-Few remarks :
- - We'll choose a dense vector size for the embedding output with **dense_vector_size**
- - **GlobalAveragePooling1D** do a pooling on the last dimension : (None, lx, ly) -> (None, ly)
-   In other words: we average the set of vectors/words of a sentence
- - L'embedding de Keras fonctionne de manière supervisée. Il s'agit d'une couche de *vocab_size* neurones vers *n_neurons* permettant de maintenir une table de vecteurs (les poids constituent les vecteurs). Cette couche ne calcule pas de sortie a la façon des couches normales, mais renvois la valeur des vecteurs. n mots => n vecteurs (ensuite empilés par le pooling)
-Voir : [Explication plus détaillée (en)](https://stats.stackexchange.com/questions/324992/how-the-embedding-layer-is-trained-in-keras-embedding-layer)
-ainsi que : [Sentiment detection with Keras](https://www.liip.ch/en/blog/sentiment-detection-with-keras-word-embeddings-and-lstm-deep-learning-networks)
-
-More documentation about this model functions :
- - [Embedding](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding)
- - [GlobalAveragePooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D)
-
-%% Cell type:code id: tags:
-
-``` python
-def get_model(dense_vector_size=32):
-
-    model = keras.Sequential()
-    model.add(keras.layers.Embedding(input_dim    = vocab_size,
-                                     output_dim   = dense_vector_size,
-                                     input_length = review_len))
-    model.add(keras.layers.GlobalAveragePooling1D())
-    model.add(keras.layers.Dense(dense_vector_size, activation='relu'))
-    model.add(keras.layers.Dense(1,                 activation='sigmoid'))
-
-    model.compile(optimizer = 'adam',
-                  loss      = 'binary_crossentropy',
-                  metrics   = ['accuracy'])
-    return model
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 5 - Train the model
-### 5.1 - Get it
-
-%% Cell type:code id: tags:
-
-``` python
-model = get_model(32)
-
-model.summary()
-```
-
-%% Output
-
-    Model: "sequential"
-    _________________________________________________________________
-    Layer (type)                 Output Shape              Param #
-    =================================================================
-    embedding (Embedding)        (None, 256, 32)           320000
-    _________________________________________________________________
-    global_average_pooling1d (Gl (None, 32)                0
-    _________________________________________________________________
-    dense (Dense)                (None, 32)                1056
-    _________________________________________________________________
-    dense_1 (Dense)              (None, 1)                 33
-    =================================================================
-    Total params: 321,089
-    Trainable params: 321,089
-    Non-trainable params: 0
-    _________________________________________________________________
-
-%% Cell type:markdown id: tags:
-
-### 5.2 - Add callback
-
-%% Cell type:code id: tags:
-
-``` python
-os.makedirs('./run/models',   mode=0o750, exist_ok=True)
-save_dir = "./run/models/best_model.h5"
-savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)
-```
-
-%% Cell type:markdown id: tags:
-
-### 5.1 - Train it
-
-%% Cell type:code id: tags:
-
-``` python
-%%time
-
-n_epochs   = 30
-batch_size = 512
-
-history = model.fit(x_train,
-                    y_train,
-                    epochs          = n_epochs,
-                    batch_size      = batch_size,
-                    validation_data = (x_test, y_test),
-                    verbose         = 1,
-                    callbacks       = [savemodel_callback])
-```
-
-%% Output
-
-    Train on 25000 samples, validate on 25000 samples
-    Epoch 1/30
-    25000/25000 [==============================] - 1s 57us/sample - loss: 0.6881 - accuracy: 0.6431 - val_loss: 0.6782 - val_accuracy: 0.7408
-    Epoch 2/30
-    25000/25000 [==============================] - 1s 32us/sample - loss: 0.6506 - accuracy: 0.7618 - val_loss: 0.6168 - val_accuracy: 0.7650
-    Epoch 3/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.5590 - accuracy: 0.8060 - val_loss: 0.5135 - val_accuracy: 0.8203
-    Epoch 4/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.4460 - accuracy: 0.8512 - val_loss: 0.4198 - val_accuracy: 0.8487
-    Epoch 5/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.3606 - accuracy: 0.8758 - val_loss: 0.3636 - val_accuracy: 0.8609
-    Epoch 6/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.3074 - accuracy: 0.8894 - val_loss: 0.3322 - val_accuracy: 0.8676
-    Epoch 7/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.2719 - accuracy: 0.9016 - val_loss: 0.3127 - val_accuracy: 0.8734
-    Epoch 8/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.2457 - accuracy: 0.9103 - val_loss: 0.3007 - val_accuracy: 0.8765
-    Epoch 9/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.2257 - accuracy: 0.9178 - val_loss: 0.2933 - val_accuracy: 0.8795
-    Epoch 10/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.2083 - accuracy: 0.9249 - val_loss: 0.2888 - val_accuracy: 0.8818
-    Epoch 11/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1938 - accuracy: 0.9310 - val_loss: 0.2874 - val_accuracy: 0.8824
-    Epoch 12/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.1818 - accuracy: 0.9352 - val_loss: 0.2867 - val_accuracy: 0.8826
-    Epoch 13/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1703 - accuracy: 0.9406 - val_loss: 0.2879 - val_accuracy: 0.8828
-    Epoch 14/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1605 - accuracy: 0.9451 - val_loss: 0.2922 - val_accuracy: 0.8815
-    Epoch 15/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.1520 - accuracy: 0.9480 - val_loss: 0.2945 - val_accuracy: 0.8828
-    Epoch 16/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1435 - accuracy: 0.9524 - val_loss: 0.2986 - val_accuracy: 0.8821
-    Epoch 17/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1359 - accuracy: 0.9551 - val_loss: 0.3042 - val_accuracy: 0.8803
-    Epoch 18/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.1290 - accuracy: 0.9581 - val_loss: 0.3100 - val_accuracy: 0.8783
-    Epoch 19/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1223 - accuracy: 0.9609 - val_loss: 0.3169 - val_accuracy: 0.8772
-    Epoch 20/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.1167 - accuracy: 0.9632 - val_loss: 0.3245 - val_accuracy: 0.8747
-    Epoch 21/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1116 - accuracy: 0.9654 - val_loss: 0.3318 - val_accuracy: 0.8756
-    Epoch 22/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.1064 - accuracy: 0.9670 - val_loss: 0.3409 - val_accuracy: 0.8743
-    Epoch 23/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.1007 - accuracy: 0.9704 - val_loss: 0.3478 - val_accuracy: 0.8728
-    Epoch 24/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.0963 - accuracy: 0.9718 - val_loss: 0.3577 - val_accuracy: 0.8718
-    Epoch 25/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.0914 - accuracy: 0.9743 - val_loss: 0.3728 - val_accuracy: 0.8670
-    Epoch 26/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.0887 - accuracy: 0.9744 - val_loss: 0.3746 - val_accuracy: 0.8697
-    Epoch 27/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.0842 - accuracy: 0.9777 - val_loss: 0.3835 - val_accuracy: 0.8683
-    Epoch 28/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.0796 - accuracy: 0.9788 - val_loss: 0.3937 - val_accuracy: 0.8670
-    Epoch 29/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.0763 - accuracy: 0.9804 - val_loss: 0.4032 - val_accuracy: 0.8658
-    Epoch 30/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.0729 - accuracy: 0.9819 - val_loss: 0.4156 - val_accuracy: 0.8648
-    CPU times: user 1min 42s, sys: 7.41 s, total: 1min 50s
-    Wall time: 23.4 s
-
-%% Cell type:markdown id: tags:
-
-## Step 6 - Evaluate
-### 6.1 - Training history
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.plot_history(history, save_as='02-history')
-```
-
-%% Output
-
-
-
-
-
-
-
-%% Cell type:markdown id: tags:
-
-### 6.2 - Reload and evaluate best model
-
-%% Cell type:code id: tags:
-
-``` python
-model = keras.models.load_model('./run/models/best_model.h5')
-
-# ---- Evaluate
-score  = model.evaluate(x_test, y_test, verbose=0)
-
-print('x_test / loss      : {:5.4f}'.format(score[0]))
-print('x_test / accuracy  : {:5.4f}'.format(score[1]))
-
-values=[score[1], 1-score[1]]
-pwk.plot_donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')
-
-# ---- Confusion matrix
-
-y_sigmoid = model.predict(x_test)
-
-y_pred = y_sigmoid.copy()
-y_pred[ y_sigmoid< 0.5 ] = 0
-y_pred[ y_sigmoid>=0.5 ] = 1
-
-pwk.display_confusion_matrix(y_test,y_pred,labels=range(2))
-pwk.plot_confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
-```
-
-%% Output
-
-    x_test / loss      : 0.2867
-    x_test / accuracy  : 0.8826
-
-    #### Accuracy donut is :
-
-
-
-
-    #### Confusion matrix is :
-
-
-
-
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Friday 18 December 2020, 17:52:43
-    Duration is : 00:00:37 585ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [IMDB1] - Text embedding with IMDB
-<!-- DESC --> A very classical example of word embedding for text classification (sentiment analysis)
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
- - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
- - Understand the management of **textual data** and **sentiment analysis**
-
-Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
-Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
-For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-
-## What we're going to do :
-
- - Retrieve data
- - Preparing the data
- - Build a model
- - Train the model
- - Evaluate the result
-
-%% Cell type:markdown id: tags:
-
-## Step 1 - Init python stuff
-
-%% Cell type:code id: tags:
-
-``` python
-import numpy as np
-
-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
-
-import matplotlib.pyplot as plt
-import matplotlib
-
-import os,sys,h5py,json
-from importlib import reload
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('IMDB1')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : IMDB1
-    Run time             : Friday 18 December 2020, 17:52:06
-    TensorFlow version   : 2.0.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /home/pjluc/datasets/fidle
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Retrieve data
-
-IMDb dataset can bet get directly from Keras - see [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-Note : Due to their nature, textual data can be somewhat complex.
-
-### 2.1 - Data structure :
-The dataset is composed of 2 parts:
-
- - **reviews**, this will be our **x**
- - **opinions** (positive/negative), this will be our **y**
-
-There are also a **dictionary**, because words are indexed in reviews
-
-```
-<dataset> = (<reviews>, <opinions>)
-
-with :  <reviews>  = [ <review1>, <review2>, ... ]
-        <opinions> = [ <rate1>,   <rate2>,   ... ]   where <ratei>   = integer
-
-where : <reviewi> = [ <w1>, <w2>, ...]    <wi> are the index (int) of the word in the dictionary
-        <ratei>   = int                   0 for negative opinion, 1 for positive
-
-
-<dictionary> = [ <word1>:<w1>, <word2>:<w2>, ... ]
-
-with :  <wordi>   = word
-        <wi>      = int
-
-```
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Get dataset
-For simplicity, we will use a pre-formatted dataset - See [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb/load_data)
-However, Keras offers some usefull tools for formatting textual data - See [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text)
-
-**Load dataset :**
-
-%% Cell type:code id: tags:
-
-``` python
-vocab_size = 10000
-
-# ----- Retrieve x,y
-
-# Uncomment this if you want to load dataset directly from keras (small size <20M)
-#
-(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
-                                                       skip_top   = 0,
-                                                       maxlen     = None,
-                                                       seed       = 42,
-                                                       start_char = 1,
-                                                       oov_char   = 2,
-                                                       index_from = 3, )
-
-# To load a h5 version of the dataset :
-#
-# with  h5py.File(f'{datasets_dir}/IMDB/origine/dataset_imdb.h5','r') as f:
-#        x_train = f['x_train'][:]
-#        y_train = f['y_train'][:]
-#        x_test  = f['x_test'][:]
-#        y_test  = f['y_test'][:]
-```
-
-%% Output
-
-    /home/pjluc/anaconda3/envs/fidle/lib/python3.7/site-packages/tensorflow_core/python/keras/datasets/imdb.py:129: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
-      x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
-    /home/pjluc/anaconda3/envs/fidle/lib/python3.7/site-packages/tensorflow_core/python/keras/datasets/imdb.py:130: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
-      x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])
-
-%% Cell type:markdown id: tags:
-
-**About this dataset :**
-
-%% Cell type:code id: tags:
-
-``` python
-print("  Max(x_train,x_test)  : ", pwk.rmax([x_train,x_test]) )
-print("  x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
-print("  x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
-
-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-```
-
-%% Output
-
-      Max(x_train,x_test)  :  9999
-      x_train : (25000,)  y_train : (25000,)
-      x_test  : (25000,)  y_test  : (25000,)
-    
-    Review example (x_train[12]) :
-    
-     [1, 14, 22, 1367, 53, 206, 159, 4, 636, 898, 74, 26, 11, 436, 363, 108, 7, 14, 432, 14, 22, 9, 1055, 34, 8599, 2, 5, 381, 3705, 4509, 14, 768, 47, 839, 25, 111, 1517, 2579, 1991, 438, 2663, 587, 4, 280, 725, 6, 58, 11, 2714, 201, 4, 206, 16, 702, 5, 5176, 19, 480, 5920, 157, 13, 64, 219, 4, 2, 11, 107, 665, 1212, 39, 4, 206, 4, 65, 410, 16, 565, 5, 24, 43, 343, 17, 5602, 8, 169, 101, 85, 206, 108, 8, 3008, 14, 25, 215, 168, 18, 6, 2579, 1991, 438, 2, 11, 129, 1609, 36, 26, 66, 290, 3303, 46, 5, 633, 115, 4363]
-
-%% Cell type:markdown id: tags:
-
-### 2.3 - Have a look for humans (optional)
-When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
-So, we shifted the dataset by 3 with the parameter index_from=3
-
-**Load dictionary :**
-
-%% Cell type:code id: tags:
-
-``` python
-# ---- Retrieve dictionary {word:index}, and encode it in ascii
-#
-word_index = imdb.get_word_index()
-
-# ---- Shift the dictionary from +3
-#
-word_index = {w:(i+3) for w,i in word_index.items()}
-
-# ---- Add <pad>, <start> and unknown tags
-#
-word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2} )
-
-# ---- Create a reverse dictionary : {index:word}
-#
-index_word = {index:word for word,index in word_index.items()}
-
-# ---- Add a nice function to transpose :
-#
-def dataset2text(review):
-    return ' '.join([index_word.get(i, '?') for i in review])
-```
-
-%% Cell type:markdown id: tags:
-
-**Have a look :**
-
-%% Cell type:code id: tags:
-
-``` python
-print('\nDictionary size     : ', len(word_index))
-for k in range(440,455):print(f'{k:2d} : {index_word[k]}' )
-pwk.subtitle('Review example :')
-print(x_train[12])
-pwk.subtitle('After translation :')
-print(dataset2text(x_train[12]))
-```
-
-%% Output
-
-    
-    Dictionary size     :  88587
-    440 : hope
-    441 : entertaining
-    442 : she's
-    443 : mr
-    444 : overall
-    445 : evil
-    446 : called
-    447 : loved
-    448 : based
-    449 : oh
-    450 : several
-    451 : fans
-    452 : mother
-    453 : drama
-    454 : beginning
-
-    <br>**Review example :**
-
-    [1, 14, 22, 1367, 53, 206, 159, 4, 636, 898, 74, 26, 11, 436, 363, 108, 7, 14, 432, 14, 22, 9, 1055, 34, 8599, 2, 5, 381, 3705, 4509, 14, 768, 47, 839, 25, 111, 1517, 2579, 1991, 438, 2663, 587, 4, 280, 725, 6, 58, 11, 2714, 201, 4, 206, 16, 702, 5, 5176, 19, 480, 5920, 157, 13, 64, 219, 4, 2, 11, 107, 665, 1212, 39, 4, 206, 4, 65, 410, 16, 565, 5, 24, 43, 343, 17, 5602, 8, 169, 101, 85, 206, 108, 8, 3008, 14, 25, 215, 168, 18, 6, 2579, 1991, 438, 2, 11, 129, 1609, 36, 26, 66, 290, 3303, 46, 5, 633, 115, 4363]
-
-    <br>**After translation :**
-
-    <start> this film contains more action before the opening credits than are in entire hollywood films of this sort this film is produced by tsui <unknown> and stars jet li this team has brought you many worthy hong kong cinema productions including the once upon a time in china series the action was fast and furious with amazing wire work i only saw the <unknown> in two shots aside from the action the story itself was strong and not just used as filler to find any other action films to rival this you must look for a hong kong cinema <unknown> in your area they are really worth checking out and usually never disappoint
-
-%% Cell type:markdown id: tags:
-
-### 2.4 - Have a look for NN
-
-%% Cell type:code id: tags:
-
-``` python
-sizes=[len(i) for i in x_train]
-plt.figure(figsize=(16,6))
-plt.hist(sizes, bins=400)
-plt.gca().set(title='Distribution of reviews by size - [{:5.2f}, {:5.2f}]'.format(min(sizes),max(sizes)),
-              xlabel='Size', ylabel='Density', xlim=[0,1500])
-pwk.save_fig('01-stats-sizes')
-plt.show()
-```
-
-%% Output
-
-
-
-
-%% Cell type:markdown id: tags:
-
-## Step 3 - Preprocess the data (padding)
-In order to be processed by an NN, all entries must have the **same length.**
-We chose a review length of **review_len**
-We will therefore complete them with a padding (of \<pad\>\)
-
-%% Cell type:code id: tags:
-
-``` python
-review_len = 256
-
-x_train = keras.preprocessing.sequence.pad_sequences(x_train,
-                                                     value   = 0,
-                                                     padding = 'post',
-                                                     maxlen  = review_len)
-
-x_test  = keras.preprocessing.sequence.pad_sequences(x_test,
-                                                     value   = 0 ,
-                                                     padding = 'post',
-                                                     maxlen  = review_len)
-
-pwk.subtitle('After padding :')
-print(x_train[12])
-pwk.subtitle('In real words :')
-print(dataset2text(x_train[12]))
-```
-
-%% Output
-
-    <br>**After padding :**
-
-    [   1   14   22 1367   53  206  159    4  636  898   74   26   11  436
-      363  108    7   14  432   14   22    9 1055   34 8599    2    5  381
-     3705 4509   14  768   47  839   25  111 1517 2579 1991  438 2663  587
-        4  280  725    6   58   11 2714  201    4  206   16  702    5 5176
-       19  480 5920  157   13   64  219    4    2   11  107  665 1212   39
-        4  206    4   65  410   16  565    5   24   43  343   17 5602    8
-      169  101   85  206  108    8 3008   14   25  215  168   18    6 2579
-     1991  438    2   11  129 1609   36   26   66  290 3303   46    5  633
-      115 4363    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0]
-
-    <br>**In real words :**
-
-    <start> this film contains more action before the opening credits than are in entire hollywood films of this sort this film is produced by tsui <unknown> and stars jet li this team has brought you many worthy hong kong cinema productions including the once upon a time in china series the action was fast and furious with amazing wire work i only saw the <unknown> in two shots aside from the action the story itself was strong and not just used as filler to find any other action films to rival this you must look for a hong kong cinema <unknown> in your area they are really worth checking out and usually never disappoint <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
-
-%% Cell type:markdown id: tags:
-
-**Save dataset and dictionary (For future use but not mandatory)**
-
-%% Cell type:code id: tags:
-
-``` python
-# ---- Write dataset in a h5 file, could be usefull
-#
-output_dir = './data'
-pwk.mkdir(output_dir)
-
-with h5py.File(f'{output_dir}/dataset_imdb.h5', 'w') as f:
-    f.create_dataset("x_train",    data=x_train)
-    f.create_dataset("y_train",    data=y_train)
-    f.create_dataset("x_test",     data=x_test)
-    f.create_dataset("y_test",     data=y_test)
-
-with open(f'{output_dir}/word_index.json', 'w') as fp:
-    json.dump(word_index, fp)
-
-with open(f'{output_dir}/index_word.json', 'w') as fp:
-    json.dump(index_word, fp)
-
-print('Saved.')
-```
-
-%% Output
-
-    Saved.
-
-%% Cell type:markdown id: tags:
-
-## Step 4 - Build the model
-Few remarks :
- - We'll choose a dense vector size for the embedding output with **dense_vector_size**
- - **GlobalAveragePooling1D** do a pooling on the last dimension : (None, lx, ly) -> (None, ly)
-   In other words: we average the set of vectors/words of a sentence
- - L'embedding de Keras fonctionne de manière supervisée. Il s'agit d'une couche de *vocab_size* neurones vers *n_neurons* permettant de maintenir une table de vecteurs (les poids constituent les vecteurs). Cette couche ne calcule pas de sortie a la façon des couches normales, mais renvois la valeur des vecteurs. n mots => n vecteurs (ensuite empilés par le pooling)
-Voir : [Explication plus détaillée (en)](https://stats.stackexchange.com/questions/324992/how-the-embedding-layer-is-trained-in-keras-embedding-layer)
-ainsi que : [Sentiment detection with Keras](https://www.liip.ch/en/blog/sentiment-detection-with-keras-word-embeddings-and-lstm-deep-learning-networks)
-
-More documentation about this model functions :
- - [Embedding](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding)
- - [GlobalAveragePooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D)
-
-%% Cell type:code id: tags:
-
-``` python
-def get_model(dense_vector_size=32):
-
-    model = keras.Sequential()
-    model.add(keras.layers.Embedding(input_dim    = vocab_size,
-                                     output_dim   = dense_vector_size,
-                                     input_length = review_len))
-    model.add(keras.layers.GlobalAveragePooling1D())
-    model.add(keras.layers.Dense(dense_vector_size, activation='relu'))
-    model.add(keras.layers.Dense(1,                 activation='sigmoid'))
-
-    model.compile(optimizer = 'adam',
-                  loss      = 'binary_crossentropy',
-                  metrics   = ['accuracy'])
-    return model
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 5 - Train the model
-### 5.1 - Get it
-
-%% Cell type:code id: tags:
-
-``` python
-model = get_model(32)
-
-model.summary()
-```
-
-%% Output
-
-    Model: "sequential"
-    _________________________________________________________________
-    Layer (type)                 Output Shape              Param #
-    =================================================================
-    embedding (Embedding)        (None, 256, 32)           320000
-    _________________________________________________________________
-    global_average_pooling1d (Gl (None, 32)                0
-    _________________________________________________________________
-    dense (Dense)                (None, 32)                1056
-    _________________________________________________________________
-    dense_1 (Dense)              (None, 1)                 33
-    =================================================================
-    Total params: 321,089
-    Trainable params: 321,089
-    Non-trainable params: 0
-    _________________________________________________________________
-
-%% Cell type:markdown id: tags:
-
-### 5.2 - Add callback
-
-%% Cell type:code id: tags:
-
-``` python
-os.makedirs('./run/models',   mode=0o750, exist_ok=True)
-save_dir = "./run/models/best_model.h5"
-savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)
-```
-
-%% Cell type:markdown id: tags:
-
-### 5.1 - Train it
-
-%% Cell type:code id: tags:
-
-``` python
-%%time
-
-n_epochs   = 30
-batch_size = 512
-
-history = model.fit(x_train,
-                    y_train,
-                    epochs          = n_epochs,
-                    batch_size      = batch_size,
-                    validation_data = (x_test, y_test),
-                    verbose         = 1,
-                    callbacks       = [savemodel_callback])
-```
-
-%% Output
-
-    Train on 25000 samples, validate on 25000 samples
-    Epoch 1/30
-    25000/25000 [==============================] - 1s 57us/sample - loss: 0.6881 - accuracy: 0.6431 - val_loss: 0.6782 - val_accuracy: 0.7408
-    Epoch 2/30
-    25000/25000 [==============================] - 1s 32us/sample - loss: 0.6506 - accuracy: 0.7618 - val_loss: 0.6168 - val_accuracy: 0.7650
-    Epoch 3/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.5590 - accuracy: 0.8060 - val_loss: 0.5135 - val_accuracy: 0.8203
-    Epoch 4/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.4460 - accuracy: 0.8512 - val_loss: 0.4198 - val_accuracy: 0.8487
-    Epoch 5/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.3606 - accuracy: 0.8758 - val_loss: 0.3636 - val_accuracy: 0.8609
-    Epoch 6/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.3074 - accuracy: 0.8894 - val_loss: 0.3322 - val_accuracy: 0.8676
-    Epoch 7/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.2719 - accuracy: 0.9016 - val_loss: 0.3127 - val_accuracy: 0.8734
-    Epoch 8/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.2457 - accuracy: 0.9103 - val_loss: 0.3007 - val_accuracy: 0.8765
-    Epoch 9/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.2257 - accuracy: 0.9178 - val_loss: 0.2933 - val_accuracy: 0.8795
-    Epoch 10/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.2083 - accuracy: 0.9249 - val_loss: 0.2888 - val_accuracy: 0.8818
-    Epoch 11/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1938 - accuracy: 0.9310 - val_loss: 0.2874 - val_accuracy: 0.8824
-    Epoch 12/30
-    25000/25000 [==============================] - 1s 31us/sample - loss: 0.1818 - accuracy: 0.9352 - val_loss: 0.2867 - val_accuracy: 0.8826
-    Epoch 13/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1703 - accuracy: 0.9406 - val_loss: 0.2879 - val_accuracy: 0.8828
-    Epoch 14/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1605 - accuracy: 0.9451 - val_loss: 0.2922 - val_accuracy: 0.8815
-    Epoch 15/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.1520 - accuracy: 0.9480 - val_loss: 0.2945 - val_accuracy: 0.8828
-    Epoch 16/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1435 - accuracy: 0.9524 - val_loss: 0.2986 - val_accuracy: 0.8821
-    Epoch 17/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1359 - accuracy: 0.9551 - val_loss: 0.3042 - val_accuracy: 0.8803
-    Epoch 18/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.1290 - accuracy: 0.9581 - val_loss: 0.3100 - val_accuracy: 0.8783
-    Epoch 19/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1223 - accuracy: 0.9609 - val_loss: 0.3169 - val_accuracy: 0.8772
-    Epoch 20/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.1167 - accuracy: 0.9632 - val_loss: 0.3245 - val_accuracy: 0.8747
-    Epoch 21/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.1116 - accuracy: 0.9654 - val_loss: 0.3318 - val_accuracy: 0.8756
-    Epoch 22/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.1064 - accuracy: 0.9670 - val_loss: 0.3409 - val_accuracy: 0.8743
-    Epoch 23/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.1007 - accuracy: 0.9704 - val_loss: 0.3478 - val_accuracy: 0.8728
-    Epoch 24/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.0963 - accuracy: 0.9718 - val_loss: 0.3577 - val_accuracy: 0.8718
-    Epoch 25/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.0914 - accuracy: 0.9743 - val_loss: 0.3728 - val_accuracy: 0.8670
-    Epoch 26/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.0887 - accuracy: 0.9744 - val_loss: 0.3746 - val_accuracy: 0.8697
-    Epoch 27/30
-    25000/25000 [==============================] - 1s 29us/sample - loss: 0.0842 - accuracy: 0.9777 - val_loss: 0.3835 - val_accuracy: 0.8683
-    Epoch 28/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.0796 - accuracy: 0.9788 - val_loss: 0.3937 - val_accuracy: 0.8670
-    Epoch 29/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.0763 - accuracy: 0.9804 - val_loss: 0.4032 - val_accuracy: 0.8658
-    Epoch 30/30
-    25000/25000 [==============================] - 1s 30us/sample - loss: 0.0729 - accuracy: 0.9819 - val_loss: 0.4156 - val_accuracy: 0.8648
-    CPU times: user 1min 42s, sys: 7.41 s, total: 1min 50s
-    Wall time: 23.4 s
-
-%% Cell type:markdown id: tags:
-
-## Step 6 - Evaluate
-### 6.1 - Training history
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.plot_history(history, save_as='02-history')
-```
-
-%% Output
-
-
-
-
-
-
-
-%% Cell type:markdown id: tags:
-
-### 6.2 - Reload and evaluate best model
-
-%% Cell type:code id: tags:
-
-``` python
-model = keras.models.load_model('./run/models/best_model.h5')
-
-# ---- Evaluate
-score  = model.evaluate(x_test, y_test, verbose=0)
-
-print('x_test / loss      : {:5.4f}'.format(score[0]))
-print('x_test / accuracy  : {:5.4f}'.format(score[1]))
-
-values=[score[1], 1-score[1]]
-pwk.plot_donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')
-
-# ---- Confusion matrix
-
-y_sigmoid = model.predict(x_test)
-
-y_pred = y_sigmoid.copy()
-y_pred[ y_sigmoid< 0.5 ] = 0
-y_pred[ y_sigmoid>=0.5 ] = 1
-
-pwk.display_confusion_matrix(y_test,y_pred,labels=range(2))
-pwk.plot_confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
-```
-
-%% Output
-
-    x_test / loss      : 0.2867
-    x_test / accuracy  : 0.8826
-
-    #### Accuracy donut is :
-
-
-
-
-    #### Confusion matrix is :
-
-
-
-
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Friday 18 December 2020, 17:52:43
-    Duration is : 00:00:37 585ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
--- a/IMDB/02-Prediction.ipynb
+++ b/IMDB/02-Prediction.ipynb
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [IMDB2] - Text embedding with IMDB - Reloaded
-<!-- DESC --> Example of reusing a previously saved model
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
- - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
- - For this, we will use our **previously saved model**.
-
-Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
-Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
-For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-
-## What we're going to do :
-
- - Preparing the data
- - Retrieve our saved model
- - Evaluate the result
-
-%% Cell type:markdown id: tags:
-
-## Step 1 - Init python stuff
-
-%% Cell type:code id: tags:
-
-``` python
-import numpy as np
-
-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
-
-import matplotlib.pyplot as plt
-import matplotlib
-import seaborn as sns
-import pandas as pd
-
-import os,sys,h5py,json,re
-
-from importlib import reload
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('IMDB2')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : IMDB2
-    Run time             : Friday 18 December 2020, 18:21:49
-    TensorFlow version   : 2.0.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /home/pjluc/datasets/fidle
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 2 : Preparing the data
-### 2.1 - Our reviews :
-
-%% Cell type:code id: tags:
-
-``` python
-reviews = [ "This film is particularly nice, a must see.",
-             "Some films are great classics and cannot be ignored.",
-             "This movie is just abominable and doesn't deserve to be seen!"]
-```
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Retrieve dictionaries
-Note : This dictionary is generated by [01-Embedding-Keras](01-Embedding-Keras.ipynb) notebook.
-
-%% Cell type:code id: tags:
-
-``` python
-with open('./data/word_index.json', 'r') as fp:
-    word_index = json.load(fp)
-    index_word = {index:word for word,index in word_index.items()}
-```
-
-%% Cell type:markdown id: tags:
-
-### 2.3 - Clean, index and padd
-
-%% Cell type:code id: tags:
-
-``` python
-max_len    = 256
-vocab_size = 10000
-
-
-nb_reviews = len(reviews)
-x_data     = []
-
-# ---- For all reviews
-for review in reviews:
-    # ---- First index must be <start>
-    index_review=[1]
-    # ---- For all words
-    for w in review.split(' '):
-        # ---- Clean it
-        w_clean = re.sub(r"[^a-zA-Z0-9]", "", w)
-        # ---- Not empty ?
-        if len(w_clean)>0:
-            # ---- Get the index
-            w_index = word_index.get(w,2)
-            if w_index>vocab_size : w_index=2
-            # ---- Add the index if < vocab_size
-            index_review.append(w_index)
-    # ---- Add the indexed review
-    x_data.append(index_review)
-
-# ---- Padding
-x_data = keras.preprocessing.sequence.pad_sequences(x_data, value   = 0, padding = 'post', maxlen  = max_len)
-```
-
-%% Cell type:markdown id: tags:
-
-### 2.4 - Have a look
-
-%% Cell type:code id: tags:
-
-``` python
-def translate(x):
-    return ' '.join( [index_word.get(i,'?') for i in x] )
-
-for i in range(nb_reviews):
-    imax=np.where(x_data[i]==0)[0][0]+5
-    print(f'\nText review      :',    reviews[i])
-    print(  f'x_train[{i:}]       :', list(x_data[i][:imax]), '(...)')
-    print(  'Translation      :', translate(x_data[i][:imax]), '(...)')
-```
-
-%% Output
-
-    
-    Text review      : This film is particularly nice, a must see.
-    x_train[0]       : [1, 2, 22, 9, 572, 2, 6, 215, 2, 0, 0, 0, 0, 0] (...)
-    Translation      : <start> <unknown> film is particularly <unknown> a must <unknown> <pad> <pad> <pad> <pad> <pad> (...)
-    
-    Text review      : Some films are great classics and cannot be ignored.
-    x_train[1]       : [1, 2, 108, 26, 87, 2239, 5, 566, 30, 2, 0, 0, 0, 0, 0] (...)
-    Translation      : <start> <unknown> films are great classics and cannot be <unknown> <pad> <pad> <pad> <pad> <pad> (...)
-    
-    Text review      : This movie is just abominable and doesn't deserve to be seen!
-    x_train[2]       : [1, 2, 20, 9, 43, 2, 5, 152, 1833, 8, 30, 2, 0, 0, 0, 0, 0] (...)
-    Translation      : <start> <unknown> movie is just <unknown> and doesn't deserve to be <unknown> <pad> <pad> <pad> <pad> <pad> (...)
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Bring back the model
-
-%% Cell type:code id: tags:
-
-``` python
-model = keras.models.load_model('./run/models/best_model.h5')
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 4 - Predict
-
-%% Cell type:code id: tags:
-
-``` python
-y_pred   = model.predict(x_data)
-```
-
-%% Cell type:markdown id: tags:
-
-#### And the winner is :
-
-%% Cell type:code id: tags:
-
-``` python
-for i in range(nb_reviews):
-    print(f'\n{reviews[i]:<70} =>',('NEGATIVE' if y_pred[i][0]<0.5 else 'POSITIVE'),f'({y_pred[i][0]:.2f})')
-```
-
-%% Output
-
-    
-    This film is particularly nice, a must see.                            => POSITIVE (0.56)
-    
-    Some films are great classics and cannot be ignored.                   => POSITIVE (0.63)
-    
-    This movie is just abominable and doesn't deserve to be seen!          => NEGATIVE (0.35)
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Friday 18 December 2020, 18:21:50
-    Duration is : 00:00:01 555ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [IMDB2] - Text embedding with IMDB - Reloaded
-<!-- DESC --> Example of reusing a previously saved model
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
- - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
- - For this, we will use our **previously saved model**.
-
-Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
-Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
-For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-
-## What we're going to do :
-
- - Preparing the data
- - Retrieve our saved model
- - Evaluate the result
-
-%% Cell type:markdown id: tags:
-
-## Step 1 - Init python stuff
-
-%% Cell type:code id: tags:
-
-``` python
-import numpy as np
-
-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
-
-import matplotlib.pyplot as plt
-import matplotlib
-import seaborn as sns
-import pandas as pd
-
-import os,sys,h5py,json,re
-
-from importlib import reload
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('IMDB2')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : IMDB2
-    Run time             : Friday 18 December 2020, 18:21:49
-    TensorFlow version   : 2.0.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /home/pjluc/datasets/fidle
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 2 : Preparing the data
-### 2.1 - Our reviews :
-
-%% Cell type:code id: tags:
-
-``` python
-reviews = [ "This film is particularly nice, a must see.",
-             "Some films are great classics and cannot be ignored.",
-             "This movie is just abominable and doesn't deserve to be seen!"]
-```
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Retrieve dictionaries
-Note : This dictionary is generated by [01-Embedding-Keras](01-Embedding-Keras.ipynb) notebook.
-
-%% Cell type:code id: tags:
-
-``` python
-with open('./data/word_index.json', 'r') as fp:
-    word_index = json.load(fp)
-    index_word = {index:word for word,index in word_index.items()}
-```
-
-%% Cell type:markdown id: tags:
-
-### 2.3 - Clean, index and padd
-
-%% Cell type:code id: tags:
-
-``` python
-max_len    = 256
-vocab_size = 10000
-
-
-nb_reviews = len(reviews)
-x_data     = []
-
-# ---- For all reviews
-for review in reviews:
-    # ---- First index must be <start>
-    index_review=[1]
-    # ---- For all words
-    for w in review.split(' '):
-        # ---- Clean it
-        w_clean = re.sub(r"[^a-zA-Z0-9]", "", w)
-        # ---- Not empty ?
-        if len(w_clean)>0:
-            # ---- Get the index
-            w_index = word_index.get(w,2)
-            if w_index>vocab_size : w_index=2
-            # ---- Add the index if < vocab_size
-            index_review.append(w_index)
-    # ---- Add the indexed review
-    x_data.append(index_review)
-
-# ---- Padding
-x_data = keras.preprocessing.sequence.pad_sequences(x_data, value   = 0, padding = 'post', maxlen  = max_len)
-```
-
-%% Cell type:markdown id: tags:
-
-### 2.4 - Have a look
-
-%% Cell type:code id: tags:
-
-``` python
-def translate(x):
-    return ' '.join( [index_word.get(i,'?') for i in x] )
-
-for i in range(nb_reviews):
-    imax=np.where(x_data[i]==0)[0][0]+5
-    print(f'\nText review      :',    reviews[i])
-    print(  f'x_train[{i:}]       :', list(x_data[i][:imax]), '(...)')
-    print(  'Translation      :', translate(x_data[i][:imax]), '(...)')
-```
-
-%% Output
-
-    
-    Text review      : This film is particularly nice, a must see.
-    x_train[0]       : [1, 2, 22, 9, 572, 2, 6, 215, 2, 0, 0, 0, 0, 0] (...)
-    Translation      : <start> <unknown> film is particularly <unknown> a must <unknown> <pad> <pad> <pad> <pad> <pad> (...)
-    
-    Text review      : Some films are great classics and cannot be ignored.
-    x_train[1]       : [1, 2, 108, 26, 87, 2239, 5, 566, 30, 2, 0, 0, 0, 0, 0] (...)
-    Translation      : <start> <unknown> films are great classics and cannot be <unknown> <pad> <pad> <pad> <pad> <pad> (...)
-    
-    Text review      : This movie is just abominable and doesn't deserve to be seen!
-    x_train[2]       : [1, 2, 20, 9, 43, 2, 5, 152, 1833, 8, 30, 2, 0, 0, 0, 0, 0] (...)
-    Translation      : <start> <unknown> movie is just <unknown> and doesn't deserve to be <unknown> <pad> <pad> <pad> <pad> <pad> (...)
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Bring back the model
-
-%% Cell type:code id: tags:
-
-``` python
-model = keras.models.load_model('./run/models/best_model.h5')
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 4 - Predict
-
-%% Cell type:code id: tags:
-
-``` python
-y_pred   = model.predict(x_data)
-```
-
-%% Cell type:markdown id: tags:
-
-#### And the winner is :
-
-%% Cell type:code id: tags:
-
-``` python
-for i in range(nb_reviews):
-    print(f'\n{reviews[i]:<70} =>',('NEGATIVE' if y_pred[i][0]<0.5 else 'POSITIVE'),f'({y_pred[i][0]:.2f})')
-```
-
-%% Output
-
-    
-    This film is particularly nice, a must see.                            => POSITIVE (0.56)
-    
-    Some films are great classics and cannot be ignored.                   => POSITIVE (0.63)
-    
-    This movie is just abominable and doesn't deserve to be seen!          => NEGATIVE (0.35)
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Friday 18 December 2020, 18:21:50
-    Duration is : 00:00:01 555ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
--- a/IMDB/03-LSTM-Keras.ipynb
+++ b/IMDB/03-LSTM-Keras.ipynb
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [IMDB3] - Text embedding/LSTM model with IMDB
-<!-- DESC --> Still the same problem, but with a network combining embedding and LSTM
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
- - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
- - Use of a model combining embedding and LSTM
-
-Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
-Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
-For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-
-## What we're going to do :
-
- - Retrieve data
- - Preparing the data
- - Build a Embedding/LSTM model
- - Train the model
- - Evaluate the result
-
-%% Cell type:markdown id: tags:
-
-## Step 1 - Init python stuff
-
-%% Cell type:code id: tags:
-
-``` python
-import numpy as np
-
-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
-
-import matplotlib.pyplot as plt
-import matplotlib
-
-import os,sys,h5py,json
-from importlib import reload
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('IMDB3')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : IMDB3
-    Run time             : Friday 18 December 2020, 19:52:57
-    TensorFlow version   : 2.0.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /home/pjluc/datasets/fidle
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Retrieve data
-
-IMDb dataset can bet get directly from Keras - see [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-Note : Due to their nature, textual data can be somewhat complex.
-
-### 2.1 - Data structure :
-The dataset is composed of 2 parts:
-
- - **reviews**, this will be our **x**
- - **opinions** (positive/negative), this will be our **y**
-
-There are also a **dictionary**, because words are indexed in reviews
-
-```
-<dataset> = (<reviews>, <opinions>)
-
-with :  <reviews>  = [ <review1>, <review2>, ... ]
-        <opinions> = [ <rate1>,   <rate2>,   ... ]   where <ratei>   = integer
-
-where : <reviewi> = [ <w1>, <w2>, ...]    <wi> are the index (int) of the word in the dictionary
-        <ratei>   = int                   0 for negative opinion, 1 for positive
-
-
-<dictionary> = [ <word1>:<w1>, <word2>:<w2>, ... ]
-
-with :  <wordi>   = word
-        <wi>      = int
-
-```
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Get dataset
-For simplicity, we will use a pre-formatted dataset - See [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb/load_data)
-However, Keras offers some usefull tools for formatting textual data - See [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text)
-
-**Load dataset :**
-
-%% Cell type:code id: tags:
-
-``` python
-vocab_size = 10000
-
-# ----- Retrieve x,y
-
-# Uncomment this if you want to load dataset directly from keras (small size <20M)
-#
-(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
-                                                       skip_top   = 0,
-                                                       maxlen     = None,
-                                                       seed       = 42,
-                                                       start_char = 1,
-                                                       oov_char   = 2,
-                                                       index_from = 3, )
-
-# To load a h5 version of the dataset :
-#
-# with  h5py.File(f'{datasets_dir}/IMDB/origine/dataset_imdb.h5','r') as f:
-#        x_train = f['x_train'][:]
-#        y_train = f['y_train'][:]
-#        x_test  = f['x_test'][:]
-#        y_test  = f['y_test'][:]
-```
-
-%% Output
-
-    /home/pjluc/anaconda3/envs/fidle/lib/python3.7/site-packages/tensorflow_core/python/keras/datasets/imdb.py:129: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
-      x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
-    /home/pjluc/anaconda3/envs/fidle/lib/python3.7/site-packages/tensorflow_core/python/keras/datasets/imdb.py:130: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
-      x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])
-
-%% Cell type:markdown id: tags:
-
-**About this dataset :**
-
-%% Cell type:code id: tags:
-
-``` python
-print("  Max(x_train,x_test)  : ", pwk.rmax([x_train,x_test]) )
-print("  x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
-print("  x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
-
-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-```
-
-%% Output
-
-      Max(x_train,x_test)  :  9999
-      x_train : (25000,)  y_train : (25000,)
-      x_test  : (25000,)  y_test  : (25000,)
-    
-    Review example (x_train[12]) :
-    
-     [1, 14, 22, 1367, 53, 206, 159, 4, 636, 898, 74, 26, 11, 436, 363, 108, 7, 14, 432, 14, 22, 9, 1055, 34, 8599, 2, 5, 381, 3705, 4509, 14, 768, 47, 839, 25, 111, 1517, 2579, 1991, 438, 2663, 587, 4, 280, 725, 6, 58, 11, 2714, 201, 4, 206, 16, 702, 5, 5176, 19, 480, 5920, 157, 13, 64, 219, 4, 2, 11, 107, 665, 1212, 39, 4, 206, 4, 65, 410, 16, 565, 5, 24, 43, 343, 17, 5602, 8, 169, 101, 85, 206, 108, 8, 3008, 14, 25, 215, 168, 18, 6, 2579, 1991, 438, 2, 11, 129, 1609, 36, 26, 66, 290, 3303, 46, 5, 633, 115, 4363]
-
-%% Cell type:markdown id: tags:
-
-### 2.3 - Have a look for humans (optional)
-When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
-So, we shifted the dataset by 3 with the parameter index_from=3
-
-**Load dictionary :**
-
-%% Cell type:code id: tags:
-
-``` python
-# ---- Retrieve dictionary {word:index}, and encode it in ascii
-#
-word_index = imdb.get_word_index()
-
-# ---- Shift the dictionary from +3
-#
-word_index = {w:(i+3) for w,i in word_index.items()}
-
-# ---- Add <pad>, <start> and unknown tags
-#
-word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2} )
-
-# ---- Create a reverse dictionary : {index:word}
-#
-index_word = {index:word for word,index in word_index.items()}
-
-# ---- Add a nice function to transpose :
-#
-def dataset2text(review):
-    return ' '.join([index_word.get(i, '?') for i in review])
-```
-
-%% Cell type:markdown id: tags:
-
-**Have a look :**
-
-%% Cell type:code id: tags:
-
-``` python
-print('\nDictionary size     : ', len(word_index))
-for k in range(440,455):print(f'{k:2d} : {index_word[k]}' )
-pwk.subtitle('Review example :')
-print(x_train[12])
-pwk.subtitle('After translation :')
-print(dataset2text(x_train[12]))
-```
-
-%% Output
-
-    
-    Dictionary size     :  88587
-    440 : hope
-    441 : entertaining
-    442 : she's
-    443 : mr
-    444 : overall
-    445 : evil
-    446 : called
-    447 : loved
-    448 : based
-    449 : oh
-    450 : several
-    451 : fans
-    452 : mother
-    453 : drama
-    454 : beginning
-
-    <br>**Review example :**
-
-    [1, 14, 22, 1367, 53, 206, 159, 4, 636, 898, 74, 26, 11, 436, 363, 108, 7, 14, 432, 14, 22, 9, 1055, 34, 8599, 2, 5, 381, 3705, 4509, 14, 768, 47, 839, 25, 111, 1517, 2579, 1991, 438, 2663, 587, 4, 280, 725, 6, 58, 11, 2714, 201, 4, 206, 16, 702, 5, 5176, 19, 480, 5920, 157, 13, 64, 219, 4, 2, 11, 107, 665, 1212, 39, 4, 206, 4, 65, 410, 16, 565, 5, 24, 43, 343, 17, 5602, 8, 169, 101, 85, 206, 108, 8, 3008, 14, 25, 215, 168, 18, 6, 2579, 1991, 438, 2, 11, 129, 1609, 36, 26, 66, 290, 3303, 46, 5, 633, 115, 4363]
-
-    <br>**After translation :**
-
-    <start> this film contains more action before the opening credits than are in entire hollywood films of this sort this film is produced by tsui <unknown> and stars jet li this team has brought you many worthy hong kong cinema productions including the once upon a time in china series the action was fast and furious with amazing wire work i only saw the <unknown> in two shots aside from the action the story itself was strong and not just used as filler to find any other action films to rival this you must look for a hong kong cinema <unknown> in your area they are really worth checking out and usually never disappoint
-
-%% Cell type:markdown id: tags:
-
-### 2.4 - Have a look for NN
-
-%% Cell type:code id: tags:
-
-``` python
-sizes=[len(i) for i in x_train]
-plt.figure(figsize=(16,6))
-plt.hist(sizes, bins=400)
-plt.gca().set(title='Distribution of reviews by size - [{:5.2f}, {:5.2f}]'.format(min(sizes),max(sizes)),
-              xlabel='Size', ylabel='Density', xlim=[0,1500])
-pwk.save_fig('01-stats-sizes')
-plt.show()
-```
-
-%% Output
-
-
-
-
-%% Cell type:markdown id: tags:
-
-## Step 3 - Preprocess the data (padding)
-In order to be processed by an NN, all entries must have the **same length.**
-We chose a review length of **review_len**
-We will therefore complete them with a padding (of \<pad\>\)
-
-%% Cell type:code id: tags:
-
-``` python
-review_len = 256
-
-x_train = keras.preprocessing.sequence.pad_sequences(x_train,
-                                                     value   = 0,
-                                                     padding = 'post',
-                                                     maxlen  = review_len)
-
-x_test  = keras.preprocessing.sequence.pad_sequences(x_test,
-                                                     value   = 0 ,
-                                                     padding = 'post',
-                                                     maxlen  = review_len)
-
-pwk.subtitle('After padding :')
-print(x_train[12])
-pwk.subtitle('In real words :')
-print(dataset2text(x_train[12]))
-```
-
-%% Output
-
-    <br>**After padding :**
-
-    [   1   14   22 1367   53  206  159    4  636  898   74   26   11  436
-      363  108    7   14  432   14   22    9 1055   34 8599    2    5  381
-     3705 4509   14  768   47  839   25  111 1517 2579 1991  438 2663  587
-        4  280  725    6   58   11 2714  201    4  206   16  702    5 5176
-       19  480 5920  157   13   64  219    4    2   11  107  665 1212   39
-        4  206    4   65  410   16  565    5   24   43  343   17 5602    8
-      169  101   85  206  108    8 3008   14   25  215  168   18    6 2579
-     1991  438    2   11  129 1609   36   26   66  290 3303   46    5  633
-      115 4363    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0]
-
-    <br>**In real words :**
-
-    <start> this film contains more action before the opening credits than are in entire hollywood films of this sort this film is produced by tsui <unknown> and stars jet li this team has brought you many worthy hong kong cinema productions including the once upon a time in china series the action was fast and furious with amazing wire work i only saw the <unknown> in two shots aside from the action the story itself was strong and not just used as filler to find any other action films to rival this you must look for a hong kong cinema <unknown> in your area they are really worth checking out and usually never disappoint <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
-
-%% Cell type:markdown id: tags:
-
-**Save dataset and dictionary (For future use but not mandatory)**
-
-%% Cell type:code id: tags:
-
-``` python
-# ---- Write dataset in a h5 file, could be usefull
-#
-output_dir = './data'
-pwk.mkdir(output_dir)
-
-with h5py.File(f'{output_dir}/dataset_imdb.h5', 'w') as f:
-    f.create_dataset("x_train",    data=x_train)
-    f.create_dataset("y_train",    data=y_train)
-    f.create_dataset("x_test",     data=x_test)
-    f.create_dataset("y_test",     data=y_test)
-
-with open(f'{output_dir}/word_index.json', 'w') as fp:
-    json.dump(word_index, fp)
-
-with open(f'{output_dir}/index_word.json', 'w') as fp:
-    json.dump(index_word, fp)
-
-print('Saved.')
-```
-
-%% Output
-
-    Saved.
-
-%% Cell type:markdown id: tags:
-
-## Step 4 - Build the model
-Few remarks :
- - We'll choose a dense vector size for the embedding output with **dense_vector_size**
- - **GlobalAveragePooling1D** do a pooling on the last dimension : (None, lx, ly) -> (None, ly)
-   In other words: we average the set of vectors/words of a sentence
- - L'embedding de Keras fonctionne de manière supervisée. Il s'agit d'une couche de *vocab_size* neurones vers *n_neurons* permettant de maintenir une table de vecteurs (les poids constituent les vecteurs). Cette couche ne calcule pas de sortie a la façon des couches normales, mais renvois la valeur des vecteurs. n mots => n vecteurs (ensuite empilés par le pooling)
-Voir : [Explication plus détaillée (en)](https://stats.stackexchange.com/questions/324992/how-the-embedding-layer-is-trained-in-keras-embedding-layer)
-ainsi que : [Sentiment detection with Keras](https://www.liip.ch/en/blog/sentiment-detection-with-keras-word-embeddings-and-lstm-deep-learning-networks)
-
-More documentation about this model functions :
- - [Embedding](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding)
- - [GlobalAveragePooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D)
-
-%% Cell type:code id: tags:
-
-``` python
-def get_model(dense_vector_size=128):
-
-    model = keras.Sequential()
-    model.add(keras.layers.Embedding(input_dim    = vocab_size,
-                                     output_dim   = dense_vector_size,
-                                     input_length = review_len))
-    model.add(keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2))
-    model.add(keras.layers.Dense(1,                 activation='sigmoid'))
-
-    model.compile(optimizer = 'adam',
-                  loss      = 'binary_crossentropy',
-                  metrics   = ['accuracy'])
-    return model
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 5 - Train the model
-### 5.1 - Get it
-
-%% Cell type:code id: tags:
-
-``` python
-model = get_model(32)
-
-model.summary()
-```
-
-%% Output
-
-    Model: "sequential"
-    _________________________________________________________________
-    Layer (type)                 Output Shape              Param #
-    =================================================================
-    embedding (Embedding)        (None, 256, 32)           320000
-    _________________________________________________________________
-    lstm (LSTM)                  (None, 128)               82432
-    _________________________________________________________________
-    dense (Dense)                (None, 1)                 129
-    =================================================================
-    Total params: 402,561
-    Trainable params: 402,561
-    Non-trainable params: 0
-    _________________________________________________________________
-
-%% Cell type:markdown id: tags:
-
-### 5.2 - Add callback
-
-%% Cell type:code id: tags:
-
-``` python
-os.makedirs('./run/models',   mode=0o750, exist_ok=True)
-save_dir = "./run/models/best_model.h5"
-savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)
-```
-
-%% Cell type:markdown id: tags:
-
-### 5.1 - Train it
-GPU : batch_size=512 :  6' 30s
-CPU : batch_size=512 : 12' 57s
-
-%% Cell type:code id: tags:
-
-``` python
-%%time
-
-n_epochs   = 10
-batch_size = 512
-
-history = model.fit(x_train,
-                    y_train,
-                    epochs          = n_epochs,
-                    batch_size      = batch_size,
-                    validation_data = (x_test, y_test),
-                    verbose         = 1,
-                    callbacks       = [savemodel_callback])
-```
-
-%% Output
-
-    Train on 25000 samples, validate on 25000 samples
-    Epoch 1/10
-    25000/25000 [==============================] - 74s 3ms/sample - loss: 0.6924 - accuracy: 0.5083 - val_loss: 0.6919 - val_accuracy: 0.5006
-    Epoch 2/10
-    25000/25000 [==============================] - 80s 3ms/sample - loss: 0.6668 - accuracy: 0.5982 - val_loss: 0.6759 - val_accuracy: 0.5417
-    Epoch 3/10
-    25000/25000 [==============================] - 81s 3ms/sample - loss: 0.6737 - accuracy: 0.5459 - val_loss: 0.6750 - val_accuracy: 0.5363
-    Epoch 4/10
-    25000/25000 [==============================] - 80s 3ms/sample - loss: 0.6569 - accuracy: 0.5696 - val_loss: 0.6373 - val_accuracy: 0.5694
-    Epoch 5/10
-    25000/25000 [==============================] - 80s 3ms/sample - loss: 0.6409 - accuracy: 0.6388 - val_loss: 0.6431 - val_accuracy: 0.6351
-    Epoch 6/10
-    25000/25000 [==============================] - 79s 3ms/sample - loss: 0.6149 - accuracy: 0.6703 - val_loss: 0.6466 - val_accuracy: 0.6623
-    Epoch 7/10
-    25000/25000 [==============================] - 81s 3ms/sample - loss: 0.5660 - accuracy: 0.7337 - val_loss: 0.5943 - val_accuracy: 0.7200
-    Epoch 8/10
-    25000/25000 [==============================] - 79s 3ms/sample - loss: 0.5467 - accuracy: 0.7518 - val_loss: 0.5087 - val_accuracy: 0.7856
-    Epoch 9/10
-    25000/25000 [==============================] - 79s 3ms/sample - loss: 0.5587 - accuracy: 0.7446 - val_loss: 0.6065 - val_accuracy: 0.6842
-    Epoch 10/10
-    25000/25000 [==============================] - 80s 3ms/sample - loss: 0.5798 - accuracy: 0.7039 - val_loss: 0.5688 - val_accuracy: 0.7133
-    CPU times: user 58min 6s, sys: 9min 21s, total: 1h 7min 27s
-    Wall time: 13min 12s
-
-%% Cell type:markdown id: tags:
-
-## Step 6 - Evaluate
-### 6.1 - Training history
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.plot_history(history, save_as='02-history')
-```
-
-%% Output
-
-
-
-
-
-
-
-%% Cell type:markdown id: tags:
-
-### 6.2 - Reload and evaluate best model
-
-%% Cell type:code id: tags:
-
-``` python
-model = keras.models.load_model('./run/models/best_model.h5')
-
-# ---- Evaluate
-score  = model.evaluate(x_test, y_test, verbose=0)
-
-print('x_test / loss      : {:5.4f}'.format(score[0]))
-print('x_test / accuracy  : {:5.4f}'.format(score[1]))
-
-values=[score[1], 1-score[1]]
-pwk.plot_donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')
-
-# ---- Confusion matrix
-
-y_sigmoid = model.predict(x_test)
-
-y_pred = y_sigmoid.copy()
-y_pred[ y_sigmoid< 0.5 ] = 0
-y_pred[ y_sigmoid>=0.5 ] = 1
-
-pwk.display_confusion_matrix(y_test,y_pred,labels=range(2))
-pwk.plot_confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
-```
-
-%% Output
-
-    x_test / loss      : 0.5087
-    x_test / accuracy  : 0.7856
-
-    #### Accuracy donut is :
-
-
-
-
-    #### Confusion matrix is :
-
-
-
-
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Friday 18 December 2020, 19:53:06
-    Duration is : 00:00:09 281ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [IMDB3] - Text embedding/LSTM model with IMDB
-<!-- DESC --> Still the same problem, but with a network combining embedding and LSTM
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
- - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
- - Use of a model combining embedding and LSTM
-
-Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
-Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
-For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-
-## What we're going to do :
-
- - Retrieve data
- - Preparing the data
- - Build a Embedding/LSTM model
- - Train the model
- - Evaluate the result
-
-%% Cell type:markdown id: tags:
-
-## Step 1 - Init python stuff
-
-%% Cell type:code id: tags:
-
-``` python
-import numpy as np
-
-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
-
-import matplotlib.pyplot as plt
-import matplotlib
-
-import os,sys,h5py,json
-from importlib import reload
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('IMDB3')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : IMDB3
-    Run time             : Friday 18 December 2020, 19:52:57
-    TensorFlow version   : 2.0.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /home/pjluc/datasets/fidle
-    Running mode         : full
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Retrieve data
-
-IMDb dataset can bet get directly from Keras - see [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-Note : Due to their nature, textual data can be somewhat complex.
-
-### 2.1 - Data structure :
-The dataset is composed of 2 parts:
-
- - **reviews**, this will be our **x**
- - **opinions** (positive/negative), this will be our **y**
-
-There are also a **dictionary**, because words are indexed in reviews
-
-```
-<dataset> = (<reviews>, <opinions>)
-
-with :  <reviews>  = [ <review1>, <review2>, ... ]
-        <opinions> = [ <rate1>,   <rate2>,   ... ]   where <ratei>   = integer
-
-where : <reviewi> = [ <w1>, <w2>, ...]    <wi> are the index (int) of the word in the dictionary
-        <ratei>   = int                   0 for negative opinion, 1 for positive
-
-
-<dictionary> = [ <word1>:<w1>, <word2>:<w2>, ... ]
-
-with :  <wordi>   = word
-        <wi>      = int
-
-```
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Get dataset
-For simplicity, we will use a pre-formatted dataset - See [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb/load_data)
-However, Keras offers some usefull tools for formatting textual data - See [documentation](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text)
-
-**Load dataset :**
-
-%% Cell type:code id: tags:
-
-``` python
-vocab_size = 10000
-
-# ----- Retrieve x,y
-
-# Uncomment this if you want to load dataset directly from keras (small size <20M)
-#
-(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
-                                                       skip_top   = 0,
-                                                       maxlen     = None,
-                                                       seed       = 42,
-                                                       start_char = 1,
-                                                       oov_char   = 2,
-                                                       index_from = 3, )
-
-# To load a h5 version of the dataset :
-#
-# with  h5py.File(f'{datasets_dir}/IMDB/origine/dataset_imdb.h5','r') as f:
-#        x_train = f['x_train'][:]
-#        y_train = f['y_train'][:]
-#        x_test  = f['x_test'][:]
-#        y_test  = f['y_test'][:]
-```
-
-%% Output
-
-    /home/pjluc/anaconda3/envs/fidle/lib/python3.7/site-packages/tensorflow_core/python/keras/datasets/imdb.py:129: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
-      x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
-    /home/pjluc/anaconda3/envs/fidle/lib/python3.7/site-packages/tensorflow_core/python/keras/datasets/imdb.py:130: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
-      x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])
-
-%% Cell type:markdown id: tags:
-
-**About this dataset :**
-
-%% Cell type:code id: tags:
-
-``` python
-print("  Max(x_train,x_test)  : ", pwk.rmax([x_train,x_test]) )
-print("  x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
-print("  x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
-
-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-```
-
-%% Output
-
-      Max(x_train,x_test)  :  9999
-      x_train : (25000,)  y_train : (25000,)
-      x_test  : (25000,)  y_test  : (25000,)
-    
-    Review example (x_train[12]) :
-    
-     [1, 14, 22, 1367, 53, 206, 159, 4, 636, 898, 74, 26, 11, 436, 363, 108, 7, 14, 432, 14, 22, 9, 1055, 34, 8599, 2, 5, 381, 3705, 4509, 14, 768, 47, 839, 25, 111, 1517, 2579, 1991, 438, 2663, 587, 4, 280, 725, 6, 58, 11, 2714, 201, 4, 206, 16, 702, 5, 5176, 19, 480, 5920, 157, 13, 64, 219, 4, 2, 11, 107, 665, 1212, 39, 4, 206, 4, 65, 410, 16, 565, 5, 24, 43, 343, 17, 5602, 8, 169, 101, 85, 206, 108, 8, 3008, 14, 25, 215, 168, 18, 6, 2579, 1991, 438, 2, 11, 129, 1609, 36, 26, 66, 290, 3303, 46, 5, 633, 115, 4363]
-
-%% Cell type:markdown id: tags:
-
-### 2.3 - Have a look for humans (optional)
-When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
-So, we shifted the dataset by 3 with the parameter index_from=3
-
-**Load dictionary :**
-
-%% Cell type:code id: tags:
-
-``` python
-# ---- Retrieve dictionary {word:index}, and encode it in ascii
-#
-word_index = imdb.get_word_index()
-
-# ---- Shift the dictionary from +3
-#
-word_index = {w:(i+3) for w,i in word_index.items()}
-
-# ---- Add <pad>, <start> and unknown tags
-#
-word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2} )
-
-# ---- Create a reverse dictionary : {index:word}
-#
-index_word = {index:word for word,index in word_index.items()}
-
-# ---- Add a nice function to transpose :
-#
-def dataset2text(review):
-    return ' '.join([index_word.get(i, '?') for i in review])
-```
-
-%% Cell type:markdown id: tags:
-
-**Have a look :**
-
-%% Cell type:code id: tags:
-
-``` python
-print('\nDictionary size     : ', len(word_index))
-for k in range(440,455):print(f'{k:2d} : {index_word[k]}' )
-pwk.subtitle('Review example :')
-print(x_train[12])
-pwk.subtitle('After translation :')
-print(dataset2text(x_train[12]))
-```
-
-%% Output
-
-    
-    Dictionary size     :  88587
-    440 : hope
-    441 : entertaining
-    442 : she's
-    443 : mr
-    444 : overall
-    445 : evil
-    446 : called
-    447 : loved
-    448 : based
-    449 : oh
-    450 : several
-    451 : fans
-    452 : mother
-    453 : drama
-    454 : beginning
-
-    <br>**Review example :**
-
-    [1, 14, 22, 1367, 53, 206, 159, 4, 636, 898, 74, 26, 11, 436, 363, 108, 7, 14, 432, 14, 22, 9, 1055, 34, 8599, 2, 5, 381, 3705, 4509, 14, 768, 47, 839, 25, 111, 1517, 2579, 1991, 438, 2663, 587, 4, 280, 725, 6, 58, 11, 2714, 201, 4, 206, 16, 702, 5, 5176, 19, 480, 5920, 157, 13, 64, 219, 4, 2, 11, 107, 665, 1212, 39, 4, 206, 4, 65, 410, 16, 565, 5, 24, 43, 343, 17, 5602, 8, 169, 101, 85, 206, 108, 8, 3008, 14, 25, 215, 168, 18, 6, 2579, 1991, 438, 2, 11, 129, 1609, 36, 26, 66, 290, 3303, 46, 5, 633, 115, 4363]
-
-    <br>**After translation :**
-
-    <start> this film contains more action before the opening credits than are in entire hollywood films of this sort this film is produced by tsui <unknown> and stars jet li this team has brought you many worthy hong kong cinema productions including the once upon a time in china series the action was fast and furious with amazing wire work i only saw the <unknown> in two shots aside from the action the story itself was strong and not just used as filler to find any other action films to rival this you must look for a hong kong cinema <unknown> in your area they are really worth checking out and usually never disappoint
-
-%% Cell type:markdown id: tags:
-
-### 2.4 - Have a look for NN
-
-%% Cell type:code id: tags:
-
-``` python
-sizes=[len(i) for i in x_train]
-plt.figure(figsize=(16,6))
-plt.hist(sizes, bins=400)
-plt.gca().set(title='Distribution of reviews by size - [{:5.2f}, {:5.2f}]'.format(min(sizes),max(sizes)),
-              xlabel='Size', ylabel='Density', xlim=[0,1500])
-pwk.save_fig('01-stats-sizes')
-plt.show()
-```
-
-%% Output
-
-
-
-
-%% Cell type:markdown id: tags:
-
-## Step 3 - Preprocess the data (padding)
-In order to be processed by an NN, all entries must have the **same length.**
-We chose a review length of **review_len**
-We will therefore complete them with a padding (of \<pad\>\)
-
-%% Cell type:code id: tags:
-
-``` python
-review_len = 256
-
-x_train = keras.preprocessing.sequence.pad_sequences(x_train,
-                                                     value   = 0,
-                                                     padding = 'post',
-                                                     maxlen  = review_len)
-
-x_test  = keras.preprocessing.sequence.pad_sequences(x_test,
-                                                     value   = 0 ,
-                                                     padding = 'post',
-                                                     maxlen  = review_len)
-
-pwk.subtitle('After padding :')
-print(x_train[12])
-pwk.subtitle('In real words :')
-print(dataset2text(x_train[12]))
-```
-
-%% Output
-
-    <br>**After padding :**
-
-    [   1   14   22 1367   53  206  159    4  636  898   74   26   11  436
-      363  108    7   14  432   14   22    9 1055   34 8599    2    5  381
-     3705 4509   14  768   47  839   25  111 1517 2579 1991  438 2663  587
-        4  280  725    6   58   11 2714  201    4  206   16  702    5 5176
-       19  480 5920  157   13   64  219    4    2   11  107  665 1212   39
-        4  206    4   65  410   16  565    5   24   43  343   17 5602    8
-      169  101   85  206  108    8 3008   14   25  215  168   18    6 2579
-     1991  438    2   11  129 1609   36   26   66  290 3303   46    5  633
-      115 4363    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0    0    0    0    0    0    0    0    0    0    0
-        0    0    0    0]
-
-    <br>**In real words :**
-
-    <start> this film contains more action before the opening credits than are in entire hollywood films of this sort this film is produced by tsui <unknown> and stars jet li this team has brought you many worthy hong kong cinema productions including the once upon a time in china series the action was fast and furious with amazing wire work i only saw the <unknown> in two shots aside from the action the story itself was strong and not just used as filler to find any other action films to rival this you must look for a hong kong cinema <unknown> in your area they are really worth checking out and usually never disappoint <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
-
-%% Cell type:markdown id: tags:
-
-**Save dataset and dictionary (For future use but not mandatory)**
-
-%% Cell type:code id: tags:
-
-``` python
-# ---- Write dataset in a h5 file, could be usefull
-#
-output_dir = './data'
-pwk.mkdir(output_dir)
-
-with h5py.File(f'{output_dir}/dataset_imdb.h5', 'w') as f:
-    f.create_dataset("x_train",    data=x_train)
-    f.create_dataset("y_train",    data=y_train)
-    f.create_dataset("x_test",     data=x_test)
-    f.create_dataset("y_test",     data=y_test)
-
-with open(f'{output_dir}/word_index.json', 'w') as fp:
-    json.dump(word_index, fp)
-
-with open(f'{output_dir}/index_word.json', 'w') as fp:
-    json.dump(index_word, fp)
-
-print('Saved.')
-```
-
-%% Output
-
-    Saved.
-
-%% Cell type:markdown id: tags:
-
-## Step 4 - Build the model
-Few remarks :
- - We'll choose a dense vector size for the embedding output with **dense_vector_size**
- - **GlobalAveragePooling1D** do a pooling on the last dimension : (None, lx, ly) -> (None, ly)
-   In other words: we average the set of vectors/words of a sentence
- - L'embedding de Keras fonctionne de manière supervisée. Il s'agit d'une couche de *vocab_size* neurones vers *n_neurons* permettant de maintenir une table de vecteurs (les poids constituent les vecteurs). Cette couche ne calcule pas de sortie a la façon des couches normales, mais renvois la valeur des vecteurs. n mots => n vecteurs (ensuite empilés par le pooling)
-Voir : [Explication plus détaillée (en)](https://stats.stackexchange.com/questions/324992/how-the-embedding-layer-is-trained-in-keras-embedding-layer)
-ainsi que : [Sentiment detection with Keras](https://www.liip.ch/en/blog/sentiment-detection-with-keras-word-embeddings-and-lstm-deep-learning-networks)
-
-More documentation about this model functions :
- - [Embedding](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding)
- - [GlobalAveragePooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D)
-
-%% Cell type:code id: tags:
-
-``` python
-def get_model(dense_vector_size=128):
-
-    model = keras.Sequential()
-    model.add(keras.layers.Embedding(input_dim    = vocab_size,
-                                     output_dim   = dense_vector_size,
-                                     input_length = review_len))
-    model.add(keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2))
-    model.add(keras.layers.Dense(1,                 activation='sigmoid'))
-
-    model.compile(optimizer = 'adam',
-                  loss      = 'binary_crossentropy',
-                  metrics   = ['accuracy'])
-    return model
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 5 - Train the model
-### 5.1 - Get it
-
-%% Cell type:code id: tags:
-
-``` python
-model = get_model(32)
-
-model.summary()
-```
-
-%% Output
-
-    Model: "sequential"
-    _________________________________________________________________
-    Layer (type)                 Output Shape              Param #
-    =================================================================
-    embedding (Embedding)        (None, 256, 32)           320000
-    _________________________________________________________________
-    lstm (LSTM)                  (None, 128)               82432
-    _________________________________________________________________
-    dense (Dense)                (None, 1)                 129
-    =================================================================
-    Total params: 402,561
-    Trainable params: 402,561
-    Non-trainable params: 0
-    _________________________________________________________________
-
-%% Cell type:markdown id: tags:
-
-### 5.2 - Add callback
-
-%% Cell type:code id: tags:
-
-``` python
-os.makedirs('./run/models',   mode=0o750, exist_ok=True)
-save_dir = "./run/models/best_model.h5"
-savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)
-```
-
-%% Cell type:markdown id: tags:
-
-### 5.1 - Train it
-GPU : batch_size=512 :  6' 30s
-CPU : batch_size=512 : 12' 57s
-
-%% Cell type:code id: tags:
-
-``` python
-%%time
-
-n_epochs   = 10
-batch_size = 512
-
-history = model.fit(x_train,
-                    y_train,
-                    epochs          = n_epochs,
-                    batch_size      = batch_size,
-                    validation_data = (x_test, y_test),
-                    verbose         = 1,
-                    callbacks       = [savemodel_callback])
-```
-
-%% Output
-
-    Train on 25000 samples, validate on 25000 samples
-    Epoch 1/10
-    25000/25000 [==============================] - 74s 3ms/sample - loss: 0.6924 - accuracy: 0.5083 - val_loss: 0.6919 - val_accuracy: 0.5006
-    Epoch 2/10
-    25000/25000 [==============================] - 80s 3ms/sample - loss: 0.6668 - accuracy: 0.5982 - val_loss: 0.6759 - val_accuracy: 0.5417
-    Epoch 3/10
-    25000/25000 [==============================] - 81s 3ms/sample - loss: 0.6737 - accuracy: 0.5459 - val_loss: 0.6750 - val_accuracy: 0.5363
-    Epoch 4/10
-    25000/25000 [==============================] - 80s 3ms/sample - loss: 0.6569 - accuracy: 0.5696 - val_loss: 0.6373 - val_accuracy: 0.5694
-    Epoch 5/10
-    25000/25000 [==============================] - 80s 3ms/sample - loss: 0.6409 - accuracy: 0.6388 - val_loss: 0.6431 - val_accuracy: 0.6351
-    Epoch 6/10
-    25000/25000 [==============================] - 79s 3ms/sample - loss: 0.6149 - accuracy: 0.6703 - val_loss: 0.6466 - val_accuracy: 0.6623
-    Epoch 7/10
-    25000/25000 [==============================] - 81s 3ms/sample - loss: 0.5660 - accuracy: 0.7337 - val_loss: 0.5943 - val_accuracy: 0.7200
-    Epoch 8/10
-    25000/25000 [==============================] - 79s 3ms/sample - loss: 0.5467 - accuracy: 0.7518 - val_loss: 0.5087 - val_accuracy: 0.7856
-    Epoch 9/10
-    25000/25000 [==============================] - 79s 3ms/sample - loss: 0.5587 - accuracy: 0.7446 - val_loss: 0.6065 - val_accuracy: 0.6842
-    Epoch 10/10
-    25000/25000 [==============================] - 80s 3ms/sample - loss: 0.5798 - accuracy: 0.7039 - val_loss: 0.5688 - val_accuracy: 0.7133
-    CPU times: user 58min 6s, sys: 9min 21s, total: 1h 7min 27s
-    Wall time: 13min 12s
-
-%% Cell type:markdown id: tags:
-
-## Step 6 - Evaluate
-### 6.1 - Training history
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.plot_history(history, save_as='02-history')
-```
-
-%% Output
-
-
-
-
-
-
-
-%% Cell type:markdown id: tags:
-
-### 6.2 - Reload and evaluate best model
-
-%% Cell type:code id: tags:
-
-``` python
-model = keras.models.load_model('./run/models/best_model.h5')
-
-# ---- Evaluate
-score  = model.evaluate(x_test, y_test, verbose=0)
-
-print('x_test / loss      : {:5.4f}'.format(score[0]))
-print('x_test / accuracy  : {:5.4f}'.format(score[1]))
-
-values=[score[1], 1-score[1]]
-pwk.plot_donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')
-
-# ---- Confusion matrix
-
-y_sigmoid = model.predict(x_test)
-
-y_pred = y_sigmoid.copy()
-y_pred[ y_sigmoid< 0.5 ] = 0
-y_pred[ y_sigmoid>=0.5 ] = 1
-
-pwk.display_confusion_matrix(y_test,y_pred,labels=range(2))
-pwk.plot_confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
-```
-
-%% Output
-
-    x_test / loss      : 0.5087
-    x_test / accuracy  : 0.7856
-
-    #### Accuracy donut is :
-
-
-
-
-    #### Confusion matrix is :
-
-
-
-
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Friday 18 December 2020, 19:53:06
-    Duration is : 00:00:09 281ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
--- a/IRIS/01-Simple-Perceptron.ipynb
+++ b/IRIS/01-Simple-Perceptron.ipynb
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [PER57] - Perceptron Model 1957
-<!-- DESC --> A simple perceptron, with the IRIS dataset.
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
- - Implement a historical linear classifier with a historical dataset !
- - The objective is to predict the type of Iris from the size of the leaves.
- - Identifying its limitations
-
-The [IRIS dataset](https://archive.ics.uci.edu/ml/datasets/Iris) is probably one of the oldest datasets, dating back to 1936 .
-
-## What we're going to do :
- - Retrieve the dataset, via scikit learn
- - training and classifying
-
-## Step 1 - Import and init
-
-%% Cell type:code id: tags:
-
-``` python
-import numpy as np
-from sklearn.datasets     import load_iris
-from sklearn.linear_model import Perceptron
-import pandas as pd
-import matplotlib.pyplot as plt
-import matplotlib
-
-import os,sys
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('PER57')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : PER57
-    Run time             : Wednesday 16 December 2020, 21:01:59
-    TensorFlow version   : 2.0.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : ~/datasets/fidle
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Prepare IRIS Dataset
-
-Retrieve a dataset : http://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets
-About the datesets : http://scikit-learn.org/stable/datasets/index.html
-
-Data fields (X) :
- 0 : sepal length in cm
- 1 : sepal width in cm
- 2 : petal length in cm
- 3 : petal width in cm
-
-Class (y) :
- 0 : class 0=Iris-Setosa, 1=Iris-Versicolour, 2=Iris-Virginica
-
-### 2.1 - Get dataset
-
-%% Cell type:code id: tags:
-
-``` python
-x0,y0 = load_iris(return_X_y=True)
-
-x = x0[:, (2,3)]     # We only keep fields 2 and 3
-y = y0.copy()
-
-y[ y0==0 ] = 1       # 1 = Iris setosa
-y[ y0>=1 ] = 0       # 0 = not iris setosa
-
-df=pd.DataFrame.from_dict({'Length (x1)':x[:,0], 'Width (x2)':x[:,1], 'Setosa {0,1} (y)':y})
-display(df)
-
-print(f'x shape : {x.shape}')
-print(f'y shape : {y.shape}')
-```
-
-%% Output
-
-
-    x shape : (150, 2)
-    y shape : (150,)
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Train and test sets
-
-%% Cell type:code id: tags:
-
-``` python
-x,y = pwk.shuffle_np_dataset(x, y)
-
-n=int(len(x)*0.8)
-x_train = x[:n]
-y_train = y[:n]
-x_test  = x[n:]
-y_test  = y[n:]
-
-print(f'x_train shape : {x_train.shape}')
-print(f'y_train shape : {y_train.shape}')
-print(f'x_test shape  : {x_test.shape}')
-print(f'y_test shape  : {y_test.shape}')
-```
-
-%% Output
-
-    x_train shape : (120, 2)
-    y_train shape : (120,)
-    x_test shape  : (30, 2)
-    y_test shape  : (30,)
-
-%% Cell type:markdown id: tags:
-
-## Step 3 - Get a perceptron, and train it
-
-%% Cell type:code id: tags:
-
-``` python
-pct = Perceptron(max_iter=100, random_state=82, tol=0.01, verbose=1)
-pct.fit(x_train, y_train)
-```
-
-%% Output
-
-    -- Epoch 1
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 120, Avg. loss: 0.176917
-    Total training time: 0.00 seconds.
-    -- Epoch 2
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 240, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    -- Epoch 3
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 360, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    -- Epoch 4
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 480, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    -- Epoch 5
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 600, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    -- Epoch 6
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 720, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    -- Epoch 7
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 840, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    Convergence after 7 epochs took 0.00 seconds
-
-    Perceptron(max_iter=100, random_state=82, tol=0.01, verbose=1)
-
-%% Cell type:markdown id: tags:
-
-## Step 4 - Prédictions
-
-%% Cell type:code id: tags:
-
-``` python
-y_pred = pct.predict(x_test)
-
-df=pd.DataFrame.from_dict({'Length (x1)':x_test[:,0], 'Width (x2)':x_test[:,1], 'y_test':y_test, 'y_pred':y_pred})
-display(df[:15])
-```
-
-%% Output
-
-
-%% Cell type:markdown id: tags:
-
-## Step 5 - Visualisation
-
-%% Cell type:code id: tags:
-
-``` python
-def plot_perceptron(x_train,y_train,x_test,y_test):
-    a = -pct.coef_[0][0] / pct.coef_[0][1]
-    b = -pct.intercept_ / pct.coef_[0][1]
-    box=[x.min(axis=0)[0],x.max(axis=0)[0],x.min(axis=0)[1],x.max(axis=0)[1]]
-    mx=(box[1]-box[0])/20
-    my=(box[3]-box[2])/20
-    box=[box[0]-mx,box[1]+mx,box[2]-my,box[3]+my]
-
-    fig, axs = plt.subplots(1, 1)
-    fig.set_size_inches(10,6)
-
-    axs.plot(x_train[y_train==1, 0], x_train[y_train==1, 1], "o", color='tomato', label="Iris-Setosa")
-    axs.plot(x_train[y_train==0, 0], x_train[y_train==0, 1], "o", color='steelblue',label="Autres")
-
-    axs.plot(x_test[y_pred==1, 0],   x_test[y_pred==1, 1],   "o", color='lightsalmon', label="Iris-Setosa (pred)")
-    axs.plot(x_test[y_pred==0, 0],   x_test[y_pred==0, 1],   "o", color='lightblue',   label="Autres (pred)")
-
-    axs.plot([box[0], box[1]], [a*box[0]+b, a*box[1]+b], "k--", linewidth=2)
-    axs.set_xlabel("Petal length (cm)", labelpad=15) #, fontsize=14)
-    axs.set_ylabel("Petal width (cm)",  labelpad=15) #, fontsize=14)
-    axs.legend(loc="lower right", fontsize=14)
-    axs.set_xlim(box[0],box[1])
-    axs.set_ylim(box[2],box[3])
-    pwk.save_fig('01-perceptron-iris')
-    plt.show()
-
-plot_perceptron(x_train,y_train, x_test,y_test)
-```
-
-%% Output
-
-
-
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Wednesday 16 December 2020, 21:02:00
-    Duration is : 00:00:01 537ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
-%% Cell type:markdown id: tags:
-
-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
-
-# <!-- TITLE --> [PER57] - Perceptron Model 1957
-<!-- DESC --> A simple perceptron, with the IRIS dataset.
-<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-
-## Objectives :
- - Implement a historical linear classifier with a historical dataset !
- - The objective is to predict the type of Iris from the size of the leaves.
- - Identifying its limitations
-
-The [IRIS dataset](https://archive.ics.uci.edu/ml/datasets/Iris) is probably one of the oldest datasets, dating back to 1936 .
-
-## What we're going to do :
- - Retrieve the dataset, via scikit learn
- - training and classifying
-
-## Step 1 - Import and init
-
-%% Cell type:code id: tags:
-
-``` python
-import numpy as np
-from sklearn.datasets     import load_iris
-from sklearn.linear_model import Perceptron
-import pandas as pd
-import matplotlib.pyplot as plt
-import matplotlib
-
-import os,sys
-
-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('PER57')
-```
-
-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : PER57
-    Run time             : Wednesday 16 December 2020, 21:01:59
-    TensorFlow version   : 2.0.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : ~/datasets/fidle
-    Update keras cache   : False
-    Save figs            : True
-    Path figs            : ./run/figs
-
-%% Cell type:markdown id: tags:
-
-## Step 2 - Prepare IRIS Dataset
-
-Retrieve a dataset : http://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets
-About the datesets : http://scikit-learn.org/stable/datasets/index.html
-
-Data fields (X) :
- 0 : sepal length in cm
- 1 : sepal width in cm
- 2 : petal length in cm
- 3 : petal width in cm
-
-Class (y) :
- 0 : class 0=Iris-Setosa, 1=Iris-Versicolour, 2=Iris-Virginica
-
-### 2.1 - Get dataset
-
-%% Cell type:code id: tags:
-
-``` python
-x0,y0 = load_iris(return_X_y=True)
-
-x = x0[:, (2,3)]     # We only keep fields 2 and 3
-y = y0.copy()
-
-y[ y0==0 ] = 1       # 1 = Iris setosa
-y[ y0>=1 ] = 0       # 0 = not iris setosa
-
-df=pd.DataFrame.from_dict({'Length (x1)':x[:,0], 'Width (x2)':x[:,1], 'Setosa {0,1} (y)':y})
-display(df)
-
-print(f'x shape : {x.shape}')
-print(f'y shape : {y.shape}')
-```
-
-%% Output
-
-
-    x shape : (150, 2)
-    y shape : (150,)
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Train and test sets
-
-%% Cell type:code id: tags:
-
-``` python
-x,y = pwk.shuffle_np_dataset(x, y)
-
-n=int(len(x)*0.8)
-x_train = x[:n]
-y_train = y[:n]
-x_test  = x[n:]
-y_test  = y[n:]
-
-print(f'x_train shape : {x_train.shape}')
-print(f'y_train shape : {y_train.shape}')
-print(f'x_test shape  : {x_test.shape}')
-print(f'y_test shape  : {y_test.shape}')
-```
-
-%% Output
-
-    x_train shape : (120, 2)
-    y_train shape : (120,)
-    x_test shape  : (30, 2)
-    y_test shape  : (30,)
-
-%% Cell type:markdown id: tags:
-
-## Step 3 - Get a perceptron, and train it
-
-%% Cell type:code id: tags:
-
-``` python
-pct = Perceptron(max_iter=100, random_state=82, tol=0.01, verbose=1)
-pct.fit(x_train, y_train)
-```
-
-%% Output
-
-    -- Epoch 1
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 120, Avg. loss: 0.176917
-    Total training time: 0.00 seconds.
-    -- Epoch 2
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 240, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    -- Epoch 3
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 360, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    -- Epoch 4
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 480, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    -- Epoch 5
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 600, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    -- Epoch 6
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 720, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    -- Epoch 7
-    Norm: 1.56, NNZs: 2, Bias: 3.000000, T: 840, Avg. loss: 0.000000
-    Total training time: 0.00 seconds.
-    Convergence after 7 epochs took 0.00 seconds
-
-    Perceptron(max_iter=100, random_state=82, tol=0.01, verbose=1)
-
-%% Cell type:markdown id: tags:
-
-## Step 4 - Prédictions
-
-%% Cell type:code id: tags:
-
-``` python
-y_pred = pct.predict(x_test)
-
-df=pd.DataFrame.from_dict({'Length (x1)':x_test[:,0], 'Width (x2)':x_test[:,1], 'y_test':y_test, 'y_pred':y_pred})
-display(df[:15])
-```
-
-%% Output
-
-
-%% Cell type:markdown id: tags:
-
-## Step 5 - Visualisation
-
-%% Cell type:code id: tags:
-
-``` python
-def plot_perceptron(x_train,y_train,x_test,y_test):
-    a = -pct.coef_[0][0] / pct.coef_[0][1]
-    b = -pct.intercept_ / pct.coef_[0][1]
-    box=[x.min(axis=0)[0],x.max(axis=0)[0],x.min(axis=0)[1],x.max(axis=0)[1]]
-    mx=(box[1]-box[0])/20
-    my=(box[3]-box[2])/20
-    box=[box[0]-mx,box[1]+mx,box[2]-my,box[3]+my]
-
-    fig, axs = plt.subplots(1, 1)
-    fig.set_size_inches(10,6)
-
-    axs.plot(x_train[y_train==1, 0], x_train[y_train==1, 1], "o", color='tomato', label="Iris-Setosa")
-    axs.plot(x_train[y_train==0, 0], x_train[y_train==0, 1], "o", color='steelblue',label="Autres")
-
-    axs.plot(x_test[y_pred==1, 0],   x_test[y_pred==1, 1],   "o", color='lightsalmon', label="Iris-Setosa (pred)")
-    axs.plot(x_test[y_pred==0, 0],   x_test[y_pred==0, 1],   "o", color='lightblue',   label="Autres (pred)")
-
-    axs.plot([box[0], box[1]], [a*box[0]+b, a*box[1]+b], "k--", linewidth=2)
-    axs.set_xlabel("Petal length (cm)", labelpad=15) #, fontsize=14)
-    axs.set_ylabel("Petal width (cm)",  labelpad=15) #, fontsize=14)
-    axs.legend(loc="lower right", fontsize=14)
-    axs.set_xlim(box[0],box[1])
-    axs.set_ylim(box[2],box[3])
-    pwk.save_fig('01-perceptron-iris')
-    plt.show()
-
-plot_perceptron(x_train,y_train, x_test,y_test)
-```
-
-%% Output
-
-
-
-
-%% Cell type:code id: tags:
-
-``` python
-pwk.end()
-```
-
-%% Output
-
-    End time is : Wednesday 16 December 2020, 21:02:00
-    Duration is : 00:00:01 537ms
-    This notebook ends here
-
-%% Cell type:markdown id: tags:
-
---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
--- a/LinearReg/01-Linear-Regression.ipynb
+++ b/LinearReg/01-Linear-Regression.ipynb
 %% Cell type:markdown id: tags:

-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
+<img width="800px" src="../fidle/img/header.svg"></img>

 # <!-- TITLE --> [LINR1] - Linear regression with direct resolution
-<!-- DESC --> Direct determination of linear regression
+<!-- DESC --> Low-level implementation, using numpy, of a direct resolution for a linear regression
 <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->

 ## Objectives :
 - Just one, the illustration of a direct resolution :-)

 ## What we're going to do :

-Equation : $ Y = X.\theta + N$
+Equation : $$Y = X.\theta + N$$
 Where N is a noise vector
 and $\theta = (a,b)$ a vector as y = a.x + b

 %% Cell type:markdown id: tags:

 ## Step 1 - Import and init

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 import math
 import matplotlib
 import matplotlib.pyplot as plt
 import sys
+import fidle

-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('LINR1')
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('LINR1')
 ```

-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : LINR1
-    Run time             : Thursday 17 December 2020, 16:30:24
-    TensorFlow version   : 2.1.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /gpfswork/rech/mlh/uja62cb/datasets
-    Running mode         : full
-    Update keras cache   : False
-
 %% Cell type:markdown id: tags:

 ## Step 2 - Retrieve a set of points

 %% Cell type:code id: tags:

 ``` python
 # ---- Paramètres
 nb    = 100     # Nombre de points
 xmin  = 0       # Distribution / x
 xmax  = 10
 a     = 4       # Distribution / y
 b     = 2       # y= a.x + b (+ bruit)
 noise = 7       # bruit

 theta = np.array([[a],[b]])

 # ---- Vecteur X  (1,x) x nb
 #      la premiere colonne est a 1 afin que X.theta <=> 1.b + x.a

 Xc1 = np.ones((nb,1))
 Xc2 = np.random.uniform(xmin,xmax,(nb,1))
 X = np.c_[ Xc1, Xc2 ]

 # ---- Noise
 # N = np.random.uniform(-noise,noise,(nb,1))
 N = noise * np.random.normal(0,1,(nb,1))

 # ---- Vecteur Y
 Y = (X @ theta) + N

 # print("X:\n",X,"\nY:\n ",Y)
 ```

 %% Cell type:markdown id: tags:

 ### Show it

 %% Cell type:code id: tags:

 ``` python
 width = 12
 height = 6

 fig, ax = plt.subplots()
 fig.set_size_inches(width,height)
 ax.plot(X[:,1], Y, ".")
 ax.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)
 ax.set_xlabel('x axis')
 ax.set_ylabel('y axis')
-pwk.save_fig('01-set_of_points')
+fidle.scrawler.save_fig('01-set_of_points')
 plt.show()
 ```

-%% Output
-
-
-
 %% Cell type:markdown id: tags:

 ## Step 3 - Direct calculation of the normal equation


 We'll try to find an optimal value of $\theta$, minimizing a cost function.
 The cost function, classically used in the case of linear regressions, is the **root mean square error** (racine carré de l'erreur quadratique moyenne):

-$RMSE(X,h_\theta)=\sqrt{\frac1n\sum_{i=1}^n\left[h_\theta(X^{(i)})-Y^{(i)}\right]^2}$
+$$RMSE(X,h_\theta)=\sqrt{\frac1n\sum_{i=1}^n\left[h_\theta(X^{(i)})-Y^{(i)}\right]^2}$$

-With the simplified variant : $MSE(X,h_\theta)=\frac1n\sum_{i=1}^n\left[h_\theta(X^{(i)})-Y^{(i)}\right]^2$
+With the simplified variant : $$MSE(X,h_\theta)=\frac1n\sum_{i=1}^n\left[h_\theta(X^{(i)})-Y^{(i)}\right]^2$$

-The optimal value of regression is : $ \hat{ \theta } =( X^{-T} .X)^{-1}.X^{-T}.Y$
+The optimal value of regression is : $$\hat{ \theta } =( X^{T} .X)^{-1}.X^{T}.Y$$

 Démontstration : https://eli.thegreenplace.net/2014/derivation-of-the-normal-equation-for-linear-regression

 %% Cell type:code id: tags:

 ``` python
 theta_hat = np.linalg.inv(X.T @ X) @ X.T @ Y

 print("Theta :\n",theta,"\n\ntheta hat :\n",theta_hat)
 ```

-%% Output
-
-    Theta :
-     [[4]
-     [2]]
-    
-    theta hat :
-     [[6.57350113]
-     [1.53229674]]
-
 %% Cell type:markdown id: tags:

 ### Show it

 %% Cell type:code id: tags:

 ``` python
 Xd = np.array([[1,xmin], [1,xmax]])
 Yd = Xd @ theta_hat

 fig, ax = plt.subplots()
 fig.set_size_inches(width,height)
 ax.plot(X[:,1], Y, ".")
 ax.plot(Xd[:,1], Yd, "-")
 ax.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)
 ax.set_xlabel('x axis')
 ax.set_ylabel('y axis')
-pwk.save_fig('02-regression-line')
+fidle.scrawler.save_fig('02-regression-line')
 plt.show()
 ```

-%% Output
-
-
-
 %% Cell type:code id: tags:

 ``` python
-pwk.end()
+fidle.end()
 ```

-%% Output
-
-    End time is : Thursday 17 December 2020, 16:30:38
-    Duration is : 00:00:13 289ms
-    This notebook ends here
-
 %% Cell type:markdown id: tags:

 ---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

 %% Cell type:markdown id: tags:

-<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
+<img width="800px" src="../fidle/img/header.svg"></img>

 # <!-- TITLE --> [LINR1] - Linear regression with direct resolution
-<!-- DESC --> Direct determination of linear regression
+<!-- DESC --> Low-level implementation, using numpy, of a direct resolution for a linear regression
 <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->

 ## Objectives :
 - Just one, the illustration of a direct resolution :-)

 ## What we're going to do :

-Equation : $ Y = X.\theta + N$
+Equation : $$Y = X.\theta + N$$
 Where N is a noise vector
 and $\theta = (a,b)$ a vector as y = a.x + b

 %% Cell type:markdown id: tags:

 ## Step 1 - Import and init

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 import math
 import matplotlib
 import matplotlib.pyplot as plt
 import sys
+import fidle

-sys.path.append('..')
-import fidle.pwk as pwk
-
-datasets_dir = pwk.init('LINR1')
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('LINR1')
 ```

-%% Output
-
-
-    **FIDLE 2020 - Practical Work Module**
-
-    Version              : 0.6.1 DEV
-    Notebook id          : LINR1
-    Run time             : Thursday 17 December 2020, 16:30:24
-    TensorFlow version   : 2.1.0
-    Keras version        : 2.2.4-tf
-    Datasets dir         : /gpfswork/rech/mlh/uja62cb/datasets
-    Running mode         : full
-    Update keras cache   : False
-
 %% Cell type:markdown id: tags:

 ## Step 2 - Retrieve a set of points

 %% Cell type:code id: tags:

 ``` python
 # ---- Paramètres
 nb    = 100     # Nombre de points
 xmin  = 0       # Distribution / x
 xmax  = 10
 a     = 4       # Distribution / y
 b     = 2       # y= a.x + b (+ bruit)
 noise = 7       # bruit

 theta = np.array([[a],[b]])

 # ---- Vecteur X  (1,x) x nb
 #      la premiere colonne est a 1 afin que X.theta <=> 1.b + x.a

 Xc1 = np.ones((nb,1))
 Xc2 = np.random.uniform(xmin,xmax,(nb,1))
 X = np.c_[ Xc1, Xc2 ]

 # ---- Noise
 # N = np.random.uniform(-noise,noise,(nb,1))
 N = noise * np.random.normal(0,1,(nb,1))

 # ---- Vecteur Y
 Y = (X @ theta) + N

 # print("X:\n",X,"\nY:\n ",Y)
 ```

 %% Cell type:markdown id: tags:

 ### Show it

 %% Cell type:code id: tags:

 ``` python
 width = 12
 height = 6

 fig, ax = plt.subplots()
 fig.set_size_inches(width,height)
 ax.plot(X[:,1], Y, ".")
 ax.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)
 ax.set_xlabel('x axis')
 ax.set_ylabel('y axis')
-pwk.save_fig('01-set_of_points')
+fidle.scrawler.save_fig('01-set_of_points')
 plt.show()
 ```

-%% Output
-
-
-
 %% Cell type:markdown id: tags:

 ## Step 3 - Direct calculation of the normal equation


 We'll try to find an optimal value of $\theta$, minimizing a cost function.
 The cost function, classically used in the case of linear regressions, is the **root mean square error** (racine carré de l'erreur quadratique moyenne):

-$RMSE(X,h_\theta)=\sqrt{\frac1n\sum_{i=1}^n\left[h_\theta(X^{(i)})-Y^{(i)}\right]^2}$
+$$RMSE(X,h_\theta)=\sqrt{\frac1n\sum_{i=1}^n\left[h_\theta(X^{(i)})-Y^{(i)}\right]^2}$$

-With the simplified variant : $MSE(X,h_\theta)=\frac1n\sum_{i=1}^n\left[h_\theta(X^{(i)})-Y^{(i)}\right]^2$
+With the simplified variant : $$MSE(X,h_\theta)=\frac1n\sum_{i=1}^n\left[h_\theta(X^{(i)})-Y^{(i)}\right]^2$$

-The optimal value of regression is : $ \hat{ \theta } =( X^{-T} .X)^{-1}.X^{-T}.Y$
+The optimal value of regression is : $$\hat{ \theta } =( X^{T} .X)^{-1}.X^{T}.Y$$

 Démontstration : https://eli.thegreenplace.net/2014/derivation-of-the-normal-equation-for-linear-regression

 %% Cell type:code id: tags:

 ``` python
 theta_hat = np.linalg.inv(X.T @ X) @ X.T @ Y

 print("Theta :\n",theta,"\n\ntheta hat :\n",theta_hat)
 ```

-%% Output
-
-    Theta :
-     [[4]
-     [2]]
-    
-    theta hat :
-     [[6.57350113]
-     [1.53229674]]
-
 %% Cell type:markdown id: tags:

 ### Show it

 %% Cell type:code id: tags:

 ``` python
 Xd = np.array([[1,xmin], [1,xmax]])
 Yd = Xd @ theta_hat

 fig, ax = plt.subplots()
 fig.set_size_inches(width,height)
 ax.plot(X[:,1], Y, ".")
 ax.plot(Xd[:,1], Yd, "-")
 ax.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)
 ax.set_xlabel('x axis')
 ax.set_ylabel('y axis')
-pwk.save_fig('02-regression-line')
+fidle.scrawler.save_fig('02-regression-line')
 plt.show()
 ```

-%% Output
-
-
-
 %% Cell type:code id: tags:

 ``` python
-pwk.end()
+fidle.end()
 ```

-%% Output
-
-    End time is : Thursday 17 December 2020, 16:30:38
-    Duration is : 00:00:13 289ms
-    This notebook ends here
-
 %% Cell type:markdown id: tags:

 ---
-<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

--- a/LinearReg/02-Gradient-descent.ipynb
+++ b/LinearReg/02-Gradient-descent.ipynb
No results found