Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Cyril Labbe
scidetect
Commits
bc5c8914
Commit
bc5c8914
authored
Mar 10, 2015
by
Tien
Browse files
advoided misclassifcation Scigen-Physgen
parent
45f7cbea
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/fr/imag/forge/scidetect/Checker/Classifier.java
View file @
bc5c8914
...
...
@@ -28,30 +28,33 @@ import fr.imag.forge.scidetect.Checker.Utils.DistancesSet;
import
fr.imag.forge.scidetect.Checker.Utils.ThresholdsSet
;
/**
* Classifier is tagging input files has being of a certain class. Example of classes are SCIgen, Mathgen,...
* The decision is made according to the distance between the tested file and its nearest neighbor
* in each class. Thresholds for assigning are read in file specified in the configuration file
* Classifier is tagging input files has being of a certain class. Example of
* classes are SCIgen, Mathgen,... The decision is made according to the
* distance between the tested file and its nearest neighbor in each class.
* Thresholds for assigning are read in file specified in the configuration file
* (default config.txt).
*
* @author Nguyen Minh Tien - minh-tien.nguyen@imag.frs
*/
public
class
Classifier
{
private
ThresholdsSet
SetOfThresholds
;
/**
* @throws Exception
*
*/
public
Classifier
()
{
this
.
SetOfThresholds
=
new
ThresholdsSet
();
this
.
SetOfThresholds
.
Init
();
}
private
ThresholdsSet
SetOfThresholds
;
/**
* @throws Exception
*
*/
public
Classifier
()
{
this
.
SetOfThresholds
=
new
ThresholdsSet
();
this
.
SetOfThresholds
.
Init
();
}
/**
* Classify is classifying each document given the matrix of distances (distant).
* For each entry it gives the class (or more) to which the text can be assigned
* @param distant is a matrix of distances
* Classify is classifying each document given the matrix of distances
* (distant). For each entry it gives the class (or more) to which the text
* can be assigned
*
* @param distant is a matrix of distances
* @return the assigned class
* @throws IOException
*/
...
...
@@ -59,7 +62,7 @@ public class Classifier {
String
result
=
""
;
String
conclusion
=
""
;
for
(
String
key
:
distant
.
keySet
())
{
//for each file in the test
result
=
find_NN
(
distant
.
get
(
key
));
...
...
@@ -82,9 +85,12 @@ public class Classifier {
return
conclusion
;
}
/**
* Check if the distance is lower, between of upper the two threshold.
* @param result a string composed having for each classes the value of its NN
* Check if the distance is lower, between of upper the two threshold.
*
* @param result a string composed having for each classes the value of its
* NN
* @return
*/
private
String
checkdistant
(
String
result
)
{
...
...
@@ -96,6 +102,7 @@ public class Classifier {
//System.out.println(eachtype[i]);
//get threshold for the corresponding type
Double
[]
threshold
=
new
Double
[
2
];
if
(
SetOfThresholds
.
containsKey
(
eachNN
[
0
]))
{
threshold
=
SetOfThresholds
.
get
(
eachNN
[
0
]);
}
else
{
...
...
@@ -114,7 +121,10 @@ public class Classifier {
}
return
conclution
;
}
/**
* In case it is not a generation, find the closest text to it in general
*
* @param result
* @return
*/
...
...
@@ -132,10 +142,12 @@ public class Classifier {
}
return
conclu
;
}
/**
*
* @param indexpath
* @return
* Get the type of the generation base on the path to it
*
* @param indexpath path to the NN
* @return type of generation (parent folder)
*/
private
String
gettype
(
String
indexpath
)
{
File
indexfile
=
new
File
(
indexpath
);
...
...
@@ -146,6 +158,8 @@ public class Classifier {
}
/**
* Get the nearest neighbor in each type of generation
*
* @param distantto
* @return
*/
...
...
@@ -166,6 +180,14 @@ public class Classifier {
}
}
//remove either physgen or scigen to advoid misslassification
if
(
distotype
.
containsKey
(
"Scigen"
)
&&
distotype
.
containsKey
(
"Physgen"
))
{
if
(
distotype
.
get
(
"Scigen"
)
<
distotype
.
get
(
"Physgen"
))
{
distotype
.
remove
(
"Physgen"
);
}
else
{
distotype
.
remove
(
"Scigen"
);
}
}
// it returns the path to the NN
String
result
=
""
;
for
(
String
key
:
distotype
.
keySet
())
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment