18/11/19: maintenance de la plate-forme - Interruptions du service gricad-gitlab et perturbations possibles tout au long de la journée.

Commit 56957cb7 authored by Cyril Labbe's avatar Cyril Labbe

Utils package for Set of distance and set of thersholds

	modified:   src/fr/imag/forge/Scidetect/Checker/Classifier.java
	modified:   src/fr/imag/forge/Scidetect/Checker/DistantCalculator.java
	new file:   src/fr/imag/forge/Scidetect/Checker/Utils/DistancesSet.java
	new file:   src/fr/imag/forge/Scidetect/Checker/Utils/ThresholdsSet.java
	modified:   src/fr/imag/forge/Scidetect/TextExtractor/Xmlextractor.java
	modified:   src/fr/imag/forge/Scidetect/scigenchecker_local/ScigenChecker_Local.java
parent b314ad44
......@@ -24,41 +24,30 @@ import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import fr.imag.forge.Scidetect.Checker.Utils.DistancesSet;
import fr.imag.forge.Scidetect.Checker.Utils.ThresholdsSet;
/**
* Classifier is tagging input files has being of a certain class. Example of classes are SCIgen, Mathgen,...
* The decision is made according to the distance between the tested file and its nearest neighbor
* in each class. Thresholds for assignation are read in file specified in the configuration file
* in each class. Thresholds for assigning are read in file specified in the configuration file
* (default config.txt).
* @author Nguyen Minh Tien - minh-tien.nguyen@imag.frs
*/
public class Classifier {
/**
* The key is the class name and value is a couple of Double.
* This couple is composed of a threshold for quasi-certain classification and another for suspicion.
*/
HashMap<String, Double[]> Threshold = new HashMap<String, Double[]>();
/**
* Reads threshold in the configuration file (default config.txt).
* @throws FileNotFoundException
* @throws IOException
*/
public void readconfig() throws FileNotFoundException, IOException {
File conf = new File("config.txt");
BufferedReader br = new BufferedReader(new FileReader(conf));
String line;
while ((line = br.readLine()) != null) {
if (line.startsWith("Threshold_")) {
// System.out.println(line);
String[] b = line.split("\t");
Double[] temp = new Double[2];
temp[0] = Double.parseDouble(b[1]);
temp[1] = Double.parseDouble(b[2]);
Threshold.put(b[0].substring(10, b[0].length()), temp);
//10 because i want to cut Threshold_
}
}
}
private ThresholdsSet SetOfThresholds;
/**
* @throws Exception
*
*/
public Classifier() {
this.SetOfThresholds = new ThresholdsSet();
this.SetOfThresholds.Init();
}
/**
* Classify is classifying each document given the matrix of distances (distant).
* For each entry it gives the class (or more) to which the text can be assigned
......@@ -66,11 +55,11 @@ public class Classifier {
* @return the assigned class
* @throws IOException
*/
public String classify(HashMap<String, HashMap<String, Double>> distant) throws IOException {
public String classify(DistancesSet distant) throws IOException {
String result = "";
String conclusion = "";
readconfig();
for (String key : distant.keySet()) {
//for each file in the test
result = find_NN(distant.get(key));
......@@ -107,10 +96,10 @@ public class Classifier {
//System.out.println(eachtype[i]);
//get threshold for the corresponding type
Double[] threshold = new Double[2];
if (Threshold.containsKey(eachNN[0])) {
threshold = Threshold.get(eachNN[0]);
if (SetOfThresholds.containsKey(eachNN[0])) {
threshold = SetOfThresholds.get(eachNN[0]);
} else {
threshold = Threshold.get("Default");
threshold = SetOfThresholds.get("Default");
}
//check distant with threshold
if (Double.parseDouble(eachNN[1]) < threshold[0]) {
......
......@@ -20,26 +20,28 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import fr.imag.forge.Scidetect.Checker.Utils.DistancesSet;
/**
*
* @author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
*/
public class DistantCalculator {
private HashMap<String, HashMap<String, Double>> distant = new HashMap<String, HashMap<String, Double>>();
//private HashMap<String, HashMap<String, Double>> distant = new HashMap<String, HashMap<String, Double>>();
private DistancesSet distant = new DistancesSet();
public HashMap<String, HashMap<String, Double>> caldistant(HashMap<String, HashMap<String, Integer>> samples, HashMap<String, HashMap<String, Integer>> tests) {
public DistancesSet caldistant(HashMap<String, HashMap<String, Integer>> samples, HashMap<String, HashMap<String, Integer>> tests) {
for (String key : tests.keySet()) {
HashMap<String, Double> distantto = new HashMap<String, Double>();
//HashMap<String, Double> distantto = new HashMap<String, Double>();
for (String key2 : samples.keySet()) {
double distanttt = cal_textdistant(tests.get(key),
samples.get(key2));
double distanttt = cal_textdistant(tests.get(key),samples.get(key2));
// System.out.println("distant between " + key + " and " + key2
// + ": " + distanttt);
distantto.put(key2, distanttt);
//distantto.put(key2, distanttt);
distant.setDist(key,key2,distanttt);
}
distant.put(key, distantto);
//distant.put(key, distantto);
}
return distant;
......
package fr.imag.forge.Scidetect.Checker.Utils;
import java.util.HashMap;
/**
* DistancesSet(A,B) Stores the distance between the two texts A and B
*
*/
public class DistancesSet extends HashMap<String, HashMap<String, Double>>{
/**
* Constructor
*/
public DistancesSet() {
super();
}
/**
* Get the value of the distance between A and B
* @param A
* @param B
* @return the distance between A and B
*/
public Double getDist(String A, String B){
return this.get(A).get(B);
}
/**
* Set the distance between A and B to the value d
* @param A
* @param B
* @param d
*/
public void setDist(String A, String B, Double d){
if (this.get(A) == null) {this.put(A,new HashMap<String, Double>());}
this.get(A).put(B,d);
}
}
package fr.imag.forge.Scidetect.Checker.Utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
/**
* @author
*/
public class ThresholdsSet extends HashMap<String, Double[]> {
public ThresholdsSet() {
super();
}
/**
* Initialize the thresholds Set by reading the configuration file
* @throws FileNotFoundException
* @throws IOException
*/
public void Init() {
try{
this.readconfig();
}
catch (Exception E) {
System.out.println("*****");
System.out.println("***** Problem when reading Thresholds values");
System.out.println("***** Please check the config.txt file ");
System.out.println("*****");
};
}
/**
* Reads threshold in the configuration file (default config.txt).
* @throws Exception
*/
private void readconfig() throws Exception {
File conf = new File("config.txt");
BufferedReader br = new BufferedReader(new FileReader(conf));
String line;
boolean foundClass= false;
while ((line = br.readLine()) != null) {
if (line.startsWith("Threshold_")) {
// System.out.println(line);
foundClass=true;
String[] b = line.split("\t");
Double[] temp = new Double[2];
temp[0] = Double.parseDouble(b[1]);
temp[1] = Double.parseDouble(b[2]);
this.put(b[0].substring(10, b[0].length()), temp);
//10 because i want to cut Threshold_
}
}
br.close();
if (!foundClass){throw new Exception();};
}
}
......@@ -62,11 +62,11 @@ public class Xmlextractor {
//return text;
}
/**
* <#Description#>
* Extract txt from xml
*
* @param xml <#xml description#>
* @param xml xml File
*
* @return <#return value description#>
* @return extracted content
*/
public String xmlextract(File xml) throws IOException {
try {
......
......@@ -20,8 +20,10 @@ import fr.imag.forge.Scidetect.Checker.Classifier;
import fr.imag.forge.Scidetect.Checker.DistantCalculator;
import fr.imag.forge.Scidetect.Checker.Indexer;
import fr.imag.forge.Scidetect.Checker.Reader;
import fr.imag.forge.Scidetect.Checker.Utils.DistancesSet;
import fr.imag.forge.Scidetect.Logger.Log;
import fr.imag.forge.Scidetect.TextExtractor.pdfextractor;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
......@@ -47,7 +49,8 @@ public class ScigenChecker_Local {
private HashMap<String, HashMap<String, Integer>> samples = new HashMap<String, HashMap<String, Integer>>();
private HashMap<String, HashMap<String, Integer>> tests = new HashMap<String, HashMap<String, Integer>>();
private String SamplesFolder;
private HashMap<String, HashMap<String, Double>> distant = new HashMap<String, HashMap<String, Double>>();
//private HashMap<String, HashMap<String, Double>> distant = new HashMap<String, HashMap<String, Double>>();
DistancesSet distant = new DistancesSet();
private Boolean savedetaillog = false;
/**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment