Commit bc5c8914 authored by Tien's avatar Tien

advoided misclassifcation Scigen-Physgen

parent 45f7cbea
......@@ -28,30 +28,33 @@ import fr.imag.forge.scidetect.Checker.Utils.DistancesSet;
import fr.imag.forge.scidetect.Checker.Utils.ThresholdsSet;
/**
* Classifier is tagging input files has being of a certain class. Example of classes are SCIgen, Mathgen,...
* The decision is made according to the distance between the tested file and its nearest neighbor
* in each class. Thresholds for assigning are read in file specified in the configuration file
* Classifier is tagging input files has being of a certain class. Example of
* classes are SCIgen, Mathgen,... The decision is made according to the
* distance between the tested file and its nearest neighbor in each class.
* Thresholds for assigning are read in file specified in the configuration file
* (default config.txt).
*
* @author Nguyen Minh Tien - minh-tien.nguyen@imag.frs
*/
public class Classifier {
private ThresholdsSet SetOfThresholds;
/**
* @throws Exception
*
*/
public Classifier() {
this.SetOfThresholds = new ThresholdsSet();
this.SetOfThresholds.Init();
}
private ThresholdsSet SetOfThresholds;
/**
* @throws Exception
*
*/
public Classifier() {
this.SetOfThresholds = new ThresholdsSet();
this.SetOfThresholds.Init();
}
/**
* Classify is classifying each document given the matrix of distances (distant).
* For each entry it gives the class (or more) to which the text can be assigned
* @param distant is a matrix of distances
* Classify is classifying each document given the matrix of distances
* (distant). For each entry it gives the class (or more) to which the text
* can be assigned
*
* @param distant is a matrix of distances
* @return the assigned class
* @throws IOException
*/
......@@ -59,7 +62,7 @@ public class Classifier {
String result = "";
String conclusion = "";
for (String key : distant.keySet()) {
//for each file in the test
result = find_NN(distant.get(key));
......@@ -82,9 +85,12 @@ public class Classifier {
return conclusion;
}
/**
* Check if the distance is lower, between of upper the two threshold.
* @param result a string composed having for each classes the value of its NN
* Check if the distance is lower, between of upper the two threshold.
*
* @param result a string composed having for each classes the value of its
* NN
* @return
*/
private String checkdistant(String result) {
......@@ -96,6 +102,7 @@ public class Classifier {
//System.out.println(eachtype[i]);
//get threshold for the corresponding type
Double[] threshold = new Double[2];
if (SetOfThresholds.containsKey(eachNN[0])) {
threshold = SetOfThresholds.get(eachNN[0]);
} else {
......@@ -114,7 +121,10 @@ public class Classifier {
}
return conclution;
}
/**
* In case it is not a generation, find the closest text to it in general
*
* @param result
* @return
*/
......@@ -132,10 +142,12 @@ public class Classifier {
}
return conclu;
}
/**
*
* @param indexpath
* @return
* Get the type of the generation base on the path to it
*
* @param indexpath path to the NN
* @return type of generation (parent folder)
*/
private String gettype(String indexpath) {
File indexfile = new File(indexpath);
......@@ -146,6 +158,8 @@ public class Classifier {
}
/**
* Get the nearest neighbor in each type of generation
*
* @param distantto
* @return
*/
......@@ -166,6 +180,14 @@ public class Classifier {
}
}
//remove either physgen or scigen to advoid misslassification
if (distotype.containsKey("Scigen") && distotype.containsKey("Physgen")) {
if (distotype.get("Scigen") < distotype.get("Physgen")) {
distotype.remove("Physgen");
} else {
distotype.remove("Scigen");
}
}
// it returns the path to the NN
String result = "";
for (String key : distotype.keySet()) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment