18/11/19: maintenance de la plate-forme - Interruptions du service gricad-gitlab et perturbations possibles tout au long de la journée.

Commit 06786a2e authored by Cyril Labbe's avatar Cyril Labbe

Committer: Cyril Labbe

parent 94700418
......@@ -11,7 +11,7 @@ JAVACLASSPATH = -cp lib/pdfbox-app-1.8.8.jar
default: all
all: classes doc jar
all: classes doc jar run
classes:
mkdir -p classes
......@@ -24,5 +24,8 @@ jar:
cd classes ; jar -cfvm ../ScigenChecker_Local`date +%Y-%m-%d`.jar ../MANIFEST.MF *; cd ..
cp ScigenChecker_Local`date +%Y-%m-%d`.jar ScigenChecker_Local.jar
run:
java -jar ScigenChecker_local.jar -l checklog.txt -c Test
clean:
rm -r classes; rm -r doc;
Manifest-Version: 1.0
Class-Path: lib/pdfbox-app-1.8.8.jar
X-COMMENT: Main-Class will be added automatically by build
Main-Class: fr.imag.Scidetect.scigenchecker_local.ScigenChecker_Local
......@@ -25,13 +25,23 @@ import java.io.PrintWriter;
import java.util.HashMap;
/**
*
* Classifier is tagging input files has being of a certain class. Example of classes are SCIgen, Mathgen,...
* The decision is made according to the distance between the tested file and its nearest neighbor
* in each class. Thresholds for assignation are read in file specified in the configuration file
* (default config.txt).
* @author Nguyen Minh Tien - minh-tien.nguyen@imag.frs
*/
public class Classifier {
/**
* The key is the class name and value is a couple of Double.
* This couple is composed of a threshold for quasi-certain classification and another for suspicion.
*/
HashMap<String, Double[]> Threshold = new HashMap<String, Double[]>();
/**
* Reads threshold in the configuration file (default config.txt).
* @throws FileNotFoundException
* @throws IOException
*/
public void readconfig() throws FileNotFoundException, IOException {
File conf = new File("config.txt");
BufferedReader br = new BufferedReader(new FileReader(conf));
......@@ -49,7 +59,13 @@ public class Classifier {
}
}
/**
* Classify is classifying each document given the matrix of distances (distant).
* For each entry it gives the class (or more) to which the text can be assigned
* @param distant is a matrix of distances
* @return the assigned class
* @throws IOException
*/
public String classify(HashMap<String, HashMap<String, Double>> distant) throws IOException {
String result = "";
......@@ -77,7 +93,11 @@ public class Classifier {
return conclusion;
}
/**
* Check if the distance is lower, between of upper the two threshold.
* @param result a string composed having for each classe the value of its NN
* @return
*/
private String checkdistant(String result) {
String conclution = "";
String[] eachtype = result.split("\n");
......@@ -105,7 +125,10 @@ public class Classifier {
}
return conclution;
}
/**
* @param result
* @return
*/
private String findmindistant(String result) {
Double mindistant = 1.0;
String[] eachtype = result.split("\n");
......@@ -120,7 +143,11 @@ public class Classifier {
}
return conclu;
}
/**
*
* @param indexpath
* @return
*/
private String gettype(String indexpath) {
File indexfile = new File(indexpath);
String parent = indexfile.getParent();
......@@ -129,6 +156,10 @@ public class Classifier {
return parent;
}
/**
* @param distantto
* @return
*/
private String find_NN(HashMap<String, Double> distantto) {
HashMap<String, Double> distotype = new HashMap<String, Double>();
HashMap<String, String> NNname = new HashMap<String, String>();
......
......@@ -29,6 +29,11 @@ public class Indexer {
private Object content;
/**
* @param content
* @param textfile
* @throws FileNotFoundException
*/
public void index(String content, File textfile) throws FileNotFoundException {
String filename = textfile.getName().substring(0,textfile.getName().lastIndexOf("."));
filename+=".txt";
......
......@@ -53,6 +53,9 @@ public class Log {
}
}
/**
* @param conclusion
*/
public void savelog(String conclusion) {
File distantout ;
if (!loglocation.equals("logs/")) {
......@@ -70,8 +73,9 @@ public class Log {
out.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.out.println("***** Scidetect : Output file error \n");
System.out.println("***** Most probably the specified file is a Dir \n");
//e.printStackTrace();
}
}
}
......@@ -44,14 +44,21 @@ public class pdfextractor {
try {
pd = PDDocument.load(pdf.getPath());
wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(totxt)));
stripper.writeText(pd, wr);
try {stripper.writeText(pd, wr);}
catch (Exception e) {
System.out.println("* Something went wrong during:");
System.out.println(" - txt extraction from pdf:"+pdf);
System.out.println("* Continuing anyway...");
}
if (pd != null) {
pd.close();
}
// I use close() to flush the stream.
wr.close();
} catch (Exception e) {
// TODO: handle exception
System.out.println("* Something went wrong during:");
System.out.println(" - txt extraction from pdf:"+pdf);
System.out.println("* Continuing anyway...");
}
//this seems to be faster but it seems like the app server does not support pdftotext
//commandexecutor cm = new commandexecutor();
......
......@@ -50,6 +50,13 @@ public class ScigenChecker_Local {
private HashMap<String, HashMap<String, Double>> distant = new HashMap<String, HashMap<String, Double>>();
private Boolean savedetaillog = false;
/**
* Read in the config file:
*- places where to find samples of each class
*- default places where to write results.
* @throws FileNotFoundException
* @throws IOException
*/
private void readconfig() throws FileNotFoundException, IOException {
File conf = new File("config.txt");
BufferedReader br = new BufferedReader(new FileReader(conf));
......@@ -75,6 +82,9 @@ public class ScigenChecker_Local {
}
/**
* @throws IOException
*/
private void compute() throws IOException {
if (testpath != null) {
DateFormat dateFormat = new SimpleDateFormat("HH:mm dd.MM.yyyy");
......@@ -88,8 +98,12 @@ public class ScigenChecker_Local {
tests = reader.readtests(testpath);
} catch (Exception e) {
e.printStackTrace();
System.out.println("* Something went wrong during:");
System.out.println(" - reading the config file");
System.out.println(" - reading the samples (dir data)");
System.out.println(" - txt extraction from pdf");
System.out.println("* Continuing anyway...");
//e.printStackTrace();
}
DistantCalculator dc = new DistantCalculator();
......@@ -103,31 +117,59 @@ public class ScigenChecker_Local {
log.savedetaillog(distant);
}
} else {
System.out.println("can not read path to test folder");
System.out.println("***** Can not read path to the folder:"+testpath);
System.out.println("***** The folder should contains file to check");
}
}
/**
* Parsing of the command line arguments:
* where to find pdf files, where results should be written
* @param args
*/
public void readargs(String[] args) {
for (int i = 0; i < args.length; i += 1) {
// System.out.println(args[i]);
if (args[i].equals("-l")) {
Log.loglocation = args[i + 1];
}
if (args[i].equals("-c")) {
testpath = args[i + 1];
}
if (args[i].equals("-d")) {
savedetaillog = true;
}
}
if (args.length > 0) {
for (int i = 0; i < args.length; i += 1) {
// System.out.println(args[i]);
if (args[i].equals("-l")) {
Log.loglocation = args[i + 1];
}
if (args[i].equals("-c")) {
testpath = args[i + 1];
}
if (args[i].equals("-d")) {
savedetaillog = true;
}
if (args[i].equals("-h")) {
printUsage();
}
}
} else
{printUsage();}
}
/**
* To print usage (-h)
*/
private static void printUsage() {
System.out.println("***** Scigen & Co Checker \n");
System.out.println("To test all files in a directory <pathToFilesDirToTest>:");
System.out.println("java -jar ScigenChecker_local.jar -l <pathToLogFile> -c <pathToFilesDirToTest> \n");
System.out.println("To print usage:");
System.out.println("java -jar ScigenChecker_local.jar -h \n");
System.out.println("***** \n");
}
/**
* This is the standalone checker. All pdf files in the dir specified after -c are
* checked against classes found in the dir "data". Results are written in the log
* file specified by the -l option. If -d is given a detailled log is produced.
* Example: testing all pdf files in a directory MyConf/PDF/ and having results
* in the MyConf/checklog.txt:
* java -jar ScigenChecker_local.jar -l MyConf/checklog.txt -c MyConf/PDF/
* @param args the command line arguments
*/
public static void main(String[] args) throws IOException {
ScigenChecker_Local a = new ScigenChecker_Local();
a.readconfig();
a.readargs(args);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment