Commit d000b9e9 authored by Tien's avatar Tien

fixed clean funtion

parent f3296cb9
this is the detail log folder
this is the logs folder
application.args=-c /home/tien/Test_demo -d
application.args=-c /home/tien/Test_demo/xml/2978-3-319-20608-0_Book_PrintPDF.pdf -d -noclean
compile.on.save=true
do.depend=false
do.jar=true
......
......@@ -68,7 +68,7 @@ jnlp.signed=false
jnlp.signing=
jnlp.signing.alias=
jnlp.signing.keystore=
main.class=fr.imag.forge.scidetect.SciDetect_local.Scidetect_Local
main.class=fr.imag.forge.scidetect.SciDetect_local.SciDetect_Local
# Optional override of default Codebase manifest attribute, use to prevent RIAs from being repurposed
manifest.custom.codebase=
# Optional override of default Permissions manifest attribute (supported values: sandbox, all-permissions)
......
......@@ -105,9 +105,9 @@ public class Classifier {
}
//check distant with threshold
if (Double.parseDouble(eachNN[1]) < threshold[0]) {
conclusion += "is a " + eachNN[0] + "\t" + eachNN[1] + "\t" + eachNN[2] + "\n";
conclusion += eachNN[0] + "\t" + eachNN[1] + "\t" + eachNN[2] + "\n";
} else if (Double.parseDouble(eachNN[1]) < threshold[1]) {
conclusion += "is suppected " + eachNN[0] + "\t" + eachNN[1] + "\t" + eachNN[2] + "\n";
conclusion += "Suppected " + eachNN[0] + "\t" + eachNN[1] + "\t" + eachNN[2] + "\n";
}
}
......@@ -132,7 +132,7 @@ public class Classifier {
String[] eachNN = eachtype[i].split("\t");
if (Double.parseDouble(eachNN[1]) < mindistant) {
mindistant = Double.parseDouble(eachNN[1]);
conclu = "is Genuine \t" + eachNN[1] + "\t" + eachNN[2] + "\n";
conclu = "Genuine \t" + eachNN[1] + "\t" + eachNN[2] + "\n";
}
}
return conclu;
......
......@@ -40,7 +40,7 @@ public class Cleaner {
String originalname = listOfFile[j].getName();
String originalnamewoextension = listOfFile[j].getName().substring(0, listOfFile[j].getName().lastIndexOf("."));
for (int i = 0; i < listOfFile.length; i++) {
if (listOfFile[i].getName().contains(originalnamewoextension) && !listOfFile[i].getName().contains(originalname)) {
if (listOfFile[i].getName().contains(originalnamewoextension) && !listOfFile[i].getName().contains(originalname)&&!listOfFile[i].getName().contains(".pdf.xtx")) {
listOfFile[i].delete();
}
}
......@@ -52,7 +52,7 @@ public class Cleaner {
File[] listOfFile = folder.getParentFile().listFiles();
String originalnamewoextension = folder.getName().substring(0, folder.getName().lastIndexOf("."));
for (int i = 0; i < listOfFile.length; i++) {
if (listOfFile[i].getName().contains(originalnamewoextension) && !listOfFile[i].getName().contains(originalname)) {
if (listOfFile[i].getName().contains(originalnamewoextension) && !listOfFile[i].getName().contains(originalname)&&!listOfFile[i].getName().contains(".pdf.xtx")) {
listOfFile[i].delete();
}
}
......
......@@ -40,10 +40,10 @@ public class Reader {
private Corpus samples = new Corpus();
private Corpus test = new Corpus();
private String SamplesFolder;
/**
*Read config file
* Read config file
*
* @throws FileNotFoundException
* @throws IOException
*/
......@@ -64,7 +64,7 @@ public class Reader {
//maxlength = Integer.parseInt(b[1]);
TextProcessor.maxlength = Integer.parseInt(b[1]);
}
if (b[0].equals("Min_length")) {
if (b[0].equals("Min_length")) {
//maxlength = Integer.parseInt(b[1]);
TextProcessor.minlength = Integer.parseInt(b[1]);
}
......@@ -74,7 +74,8 @@ public class Reader {
}
/**
*Read the sample folder
* Read the sample folder
*
* @param foldername
* @return sample corpus
* @throws IOException
......@@ -85,7 +86,7 @@ public class Reader {
for (int j = 0; j < listOfFile.length; j++) {
if (listOfFile[j].isDirectory()) {
readsamples(listOfFile[j].getPath());
} else if (listOfFile[j].getName().endsWith(".pdf") || listOfFile[j].getName().endsWith(".xml") ||listOfFile[j].getName().endsWith(".xtx")||(listOfFile[j].getName().endsWith(".txt") && !listOfFile[j].getName().startsWith("INDEX-"))) {
} else if (listOfFile[j].getName().endsWith(".pdf") || listOfFile[j].getName().endsWith(".xml") || listOfFile[j].getName().endsWith(".xtx") || (listOfFile[j].getName().endsWith(".txt") && !listOfFile[j].getName().startsWith("INDEX-"))) {
ArrayList<Text> text = new ArrayList<Text>();
TextProcessor textprocessor = new TextProcessor();
text = textprocessor.newtext(listOfFile[j], listOfFile);
......@@ -100,26 +101,40 @@ public class Reader {
}
/**
*Read the test folder
* Read the test folder
*
* @param foldername
* @return test corpus
* @throws IOException
*/
public Corpus readtests(String foldername) throws IOException {
File folder = new File(foldername);
File[] listOfFile = folder.listFiles();
for (int j = 0; j < listOfFile.length; j++) {
if (listOfFile[j].isDirectory()) {
readtests(listOfFile[j].getPath());
} else if (listOfFile[j].getName().endsWith(".pdf") || listOfFile[j].getName().endsWith(".xml") ||listOfFile[j].getName().endsWith(".xtx")) {
ArrayList<Text> text = new ArrayList<Text>();
TextProcessor textprocessor = new TextProcessor();
text = textprocessor.newtext(listOfFile[j], listOfFile);
for (int i = 0; i < text.size(); i++) {
test.put(text.get(i));
if (folder.isDirectory()) {
File[] listOfFile = folder.listFiles();
for (int j = 0; j < listOfFile.length; j++) {
if (listOfFile[j].isDirectory()) {
readtests(listOfFile[j].getPath());
} else if (listOfFile[j].getName().endsWith(".pdf") || listOfFile[j].getName().endsWith(".xml") || listOfFile[j].getName().endsWith(".xtx")) {
ArrayList<Text> text = new ArrayList<Text>();
TextProcessor textprocessor = new TextProcessor();
text = textprocessor.newtext(listOfFile[j], listOfFile);
for (int i = 0; i < text.size(); i++) {
test.put(text.get(i));
}
}
}
}
} else if (folder.getName().endsWith(".pdf") || folder.getName().endsWith(".xml") || folder.getName().endsWith(".xtx")) {
ArrayList<Text> text = new ArrayList<Text>();
TextProcessor textprocessor = new TextProcessor();
File[] listOfFile = folder.getParentFile().listFiles();
//listOfFile[0] = folder;
text = textprocessor.newtext(folder, listOfFile);
for (int i = 0; i < text.size(); i++) {
test.put(text.get(i));
}
}
return test;
......
......@@ -63,6 +63,7 @@ public class TextProcessor {
original.getName().lastIndexOf("."))
+ ".txt";
String content = "";
if (Arrays.asList(listOfFile).toString().contains(indexname)) {
// System.out.println("lets read from index file");
readindexfile(original.getParent() + "/" + indexname);
......
......@@ -53,7 +53,7 @@ public class Log {
if (!dloglocation.exists()) {
dloglocation.mkdir();
}
File distantout = new File(detailloglocation + logtime + ".xls");
File distantout = new File(detailloglocation + logtime + ".tsv");
//File distantout = new File(testpath+"/alldistant.xls");
PrintWriter out;
try {
......@@ -86,7 +86,7 @@ public class Log {
if (!location.exists()) {
location.mkdir();
}
distantout = new File(loglocation + logtime + ".xls");
distantout = new File(loglocation + logtime + ".tsv");
}
PrintWriter out;
......
......@@ -17,6 +17,7 @@
package fr.imag.forge.scidetect.SciDetect_local;
import fr.imag.forge.scidetect.Checker.Classifier;
import fr.imag.forge.scidetect.Checker.Cleaner;
import fr.imag.forge.scidetect.Checker.DistantCalculator;
import fr.imag.forge.scidetect.Checker.Reader;
import fr.imag.forge.scidetect.Checker.Utils.DistancesSet;
......@@ -39,7 +40,7 @@ import java.util.Date;
public class SciDetect_Local {
// private String loglocation;
// private String detailloglocation;
// private String detailloglocation;
private String testpath;
//private String logtime;
private Corpus samples = new Corpus();
......@@ -48,11 +49,12 @@ public class SciDetect_Local {
//private HashMap<String, HashMap<String, Double>> distant = new HashMap<String, HashMap<String, Double>>();
DistancesSet distant = new DistancesSet();
private Boolean savedetaillog = false;
private Boolean clean = true;
/**
* Read in the config file:
*- places where to find samples of each class
*- default places where to write results.
* Read in the config file: - places where to find samples of each class -
* default places where to write results.
*
* @throws FileNotFoundException
* @throws IOException
*/
......@@ -80,16 +82,18 @@ public class SciDetect_Local {
}
}
/**
* This should be where all the components be called,
* It can be used as an interface for a stand alone SciDetect API library.
* This should be where all the components be called, It can be used as an
* interface for a stand alone SciDetect API library.
*
* @throws IOException
*/
public void compute(String[] args) throws IOException {
readconfig();
readargs(args);
if (testpath != null) {
DateFormat dateFormat = new SimpleDateFormat("HH:mm dd.MM.yyyy");
DateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd-HH:mm");
Date date = new Date();
Log.logtime = dateFormat.format(date);
try {
......@@ -115,18 +119,22 @@ public class SciDetect_Local {
System.out.println(conclusion);
Log log = new Log();
log.savelog(conclusion);
if (clean){
Cleaner cleaner = new Cleaner();
cleaner.clean(testpath);}
if (savedetaillog) {
log.savedetaillog(distant);
}
} else {
System.out.println("***** Can not read path to the folder:"+testpath);
System.out.println("***** Can not read path to the folder:" + testpath);
System.out.println("***** The folder should contains file to check");
}
}
/**
* Parsing of the command line arguments:
* where to find pdf files, where results should be written
* Parsing of the command line arguments: where to find pdf files, where
* results should be written
*
* @param args
*/
public void readargs(String[] args) {
......@@ -141,13 +149,17 @@ public class SciDetect_Local {
}
if (args[i].equals("-d")) {
savedetaillog = true;
}
if (args[i].equals("-noclean")) {
clean = false;
}
if (args[i].equals("-h")) {
printUsage();
}
}
} else
{printUsage();}
} else {
printUsage();
}
}
/**
......@@ -161,14 +173,15 @@ public class SciDetect_Local {
System.out.println("java -jar SciDetect_local.jar -h \n");
System.out.println("***** \n");
}
/**
* This is the standalone checker. All pdf files in the dir specified after -c are
* checked against classes found in the dir "data". Results are written in the log
* file specified by the -l option. If -d is given a detailled log is produced.
* Example: testing all pdf files in a directory MyConf/PDF/ and having results
* in the MyConf/checklog.txt:
* java -jar ScigenChecker_local.jar -l MyConf/checklog.txt -c MyConf/PDF/
* This is the standalone checker. All pdf files in the dir specified after
* -c are checked against classes found in the dir "data". Results are
* written in the log file specified by the -l option. If -d is given a
* detailled log is produced. Example: testing all pdf files in a directory
* MyConf/PDF/ and having results in the MyConf/checklog.txt: java -jar
* ScigenChecker_local.jar -l MyConf/checklog.txt -c MyConf/PDF/
*
* @param args the command line arguments
* @throws java.io.IOException
*/
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment