diff --git a/Makefile b/Makefile
index 33f7ca483433dc0215473fb0ffd3ac7c80dc7099..e4ad248660eea05e2d91e8a6001ccc46f4f5f917 100644
--- a/Makefile
+++ b/Makefile
@@ -15,10 +15,10 @@ all: classes doc jar
classes:
mkdir -p classes
- $(JAVAC) $(JAVAFLAGS) $(JAVACLASSPATH) src/fr/imag/Scidetect/*/*.java
+ $(JAVAC) $(JAVAFLAGS) $(JAVACLASSPATH) src/fr/imag/forge/Scidetect/*/*.java
doc:
- $(JAVADOC) src/*/*/*/*/*
+ $(JAVADOC) src/*/*/*/*/*/*
jar:
cd classes ; jar -cfvm ../ScigenChecker_Local`date +%Y-%m-%d`.jar ../MANIFEST.MF *; cd ..
diff --git a/src/fr/imag/Scidetect/Checker/Classifier.java b/src/fr/imag/forge/Scidetect/Checker/Classifier.java
similarity index 99%
rename from src/fr/imag/Scidetect/Checker/Classifier.java
rename to src/fr/imag/forge/Scidetect/Checker/Classifier.java
index 4daa7f62b2a3d4d93e5f456876f0776e6b2be185..563efac8ff6e396738774ea7bc6932aaf137c59d 100644
--- a/src/fr/imag/Scidetect/Checker/Classifier.java
+++ b/src/fr/imag/forge/Scidetect/Checker/Classifier.java
@@ -14,7 +14,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
-package fr.imag.Scidetect.Checker;
+package fr.imag.forge.Scidetect.Checker;
import java.io.BufferedReader;
import java.io.File;
diff --git a/src/fr/imag/forge/Scidetect/Checker/Classifier.java~ b/src/fr/imag/forge/Scidetect/Checker/Classifier.java~
new file mode 100644
index 0000000000000000000000000000000000000000..35de5deb3acacc15a2290091d4d0a2039ed6693a
--- /dev/null
+++ b/src/fr/imag/forge/Scidetect/Checker/Classifier.java~
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
+ * author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+package fr.imag.Scidetect.Checker;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashMap;
+
+/**
+ *
+ * @author tien
+ */
+public class Classifier {
+
+ HashMap Threshold = new HashMap();
+
+ public void readconfig() throws FileNotFoundException, IOException {
+ File conf = new File("config.txt");
+ BufferedReader br = new BufferedReader(new FileReader(conf));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("Threshold_")) {
+ // System.out.println(line);
+ String[] b = line.split("\t");
+ Double[] temp = new Double[2];
+ temp[0] = Double.parseDouble(b[1]);
+ temp[1] = Double.parseDouble(b[2]);
+ Threshold.put(b[0].substring(10, b[0].length()), temp);
+ //10 because i want to cut Threshold_
+ }
+ }
+
+ }
+
+ public String classify(HashMap> distant) throws IOException {
+
+ String result = "";
+ String conclusion = "";
+ readconfig();
+ for (String key : distant.keySet()) {
+ //for each file in the test
+ result = find_NN(distant.get(key));
+ //System.out.println(result);
+ //System.out.println(key);
+ //System.out.println(result);
+ String[] a = checkdistant(result).split("\n");
+
+ if (a[0].length() == 0) {
+ conclusion += key + "\t" + "cant classify\t1\tnull\n";
+ } else {
+ for (int i = 0; i < a.length; i++) {
+
+ conclusion += key + "\t" + a[i] + "\n";
+ }
+ }
+
+ }
+ //System.out.println(conclusion);
+ return conclusion;
+
+ }
+
+ private String checkdistant(String result) {
+ String conclution = "";
+ String[] eachtype = result.split("\n");
+
+ for (int i = 0; i < eachtype.length; i++) {
+ String[] eachNN = eachtype[i].split("\t");
+ //System.out.println(eachtype[i]);
+ //get threshold for the corresponding type
+ Double[] threshold = new Double[2];
+ if (Threshold.containsKey(eachNN[0])) {
+ threshold = Threshold.get(eachNN[0]);
+ } else {
+ threshold = Threshold.get("Default");
+ }
+ //check distant with threshold
+ if (Double.parseDouble(eachNN[1]) < threshold[0]) {
+ conclution += "is a " + eachNN[0] + "\t" + eachNN[1] + "\t" + eachNN[2] + "\n";
+ } else if (Double.parseDouble(eachNN[1]) < threshold[1]) {
+ conclution += "is suppected " + eachNN[0] + "\t" + eachNN[1] + "\t" + eachNN[2] + "\n";
+ }
+
+ }
+ if (conclution == "") {
+ conclution = findmindistant(result);
+ }
+ return conclution;
+ }
+
+ private String findmindistant(String result) {
+ Double mindistant = 1.0;
+ String[] eachtype = result.split("\n");
+
+ String conclu = "";
+ for (int i = 0; i < eachtype.length; i++) {
+ String[] eachNN = eachtype[i].split("\t");
+ if (Double.parseDouble(eachNN[1]) < mindistant) {
+ mindistant = Double.parseDouble(eachNN[1]);
+ conclu = "is Genuine \t" + eachNN[1] + "\t" + eachNN[2] + "\n";
+ }
+ }
+ return conclu;
+ }
+
+ private String gettype(String indexpath) {
+ File indexfile = new File(indexpath);
+ String parent = indexfile.getParent();
+ // String type = parent.substring(0, parent.lastIndexOf("/"));
+ parent = parent.substring(parent.lastIndexOf("/") + 1, parent.length());
+ return parent;
+ }
+
+ private String find_NN(HashMap distantto) {
+ HashMap distotype = new HashMap();
+ HashMap NNname = new HashMap();
+ Double MinNN = 1.0;
+ String NN = "";
+
+ for (String key : distantto.keySet()) {
+ String type = gettype(key);
+ if (!distotype.containsKey(type)) {
+ distotype.put(type, distantto.get(key));
+ NNname.put(type, key);
+ } else if (distantto.get(key) < distotype.get(type)) {
+ distotype.put(type, distantto.get(key));
+ NNname.put(type, key);
+ }
+
+ }
+ // it returns the path to the NN
+ String result = "";
+ for (String key : distotype.keySet()) {
+ result += key + "\t" + distotype.get(key) + "\t" + NNname.get(key) + "\n";
+ }
+ return result;
+ }
+}
diff --git a/src/fr/imag/Scidetect/Checker/DistantCalculator.java b/src/fr/imag/forge/Scidetect/Checker/DistantCalculator.java
similarity index 98%
rename from src/fr/imag/Scidetect/Checker/DistantCalculator.java
rename to src/fr/imag/forge/Scidetect/Checker/DistantCalculator.java
index fab67b88f7dc0728d86526f7145f752125486923..c8f3fd3d78c49834a5cfbc244f053426f7cd75d0 100644
--- a/src/fr/imag/Scidetect/Checker/DistantCalculator.java
+++ b/src/fr/imag/forge/Scidetect/Checker/DistantCalculator.java
@@ -14,7 +14,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
-package fr.imag.Scidetect.Checker;
+package fr.imag.forge.Scidetect.Checker;
import java.util.HashMap;
import java.util.HashSet;
diff --git a/src/fr/imag/forge/Scidetect/Checker/DistantCalculator.java~ b/src/fr/imag/forge/Scidetect/Checker/DistantCalculator.java~
new file mode 100644
index 0000000000000000000000000000000000000000..c6a33d9bfe1453404f71beeff97d63eaa023b3e0
--- /dev/null
+++ b/src/fr/imag/forge/Scidetect/Checker/DistantCalculator.java~
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
+ * author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+package fr.imag.Scidetect.Checker;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ *
+ * @author tien
+ */
+public class DistantCalculator {
+
+ private HashMap> distant = new HashMap>();
+
+ public HashMap> caldistant(HashMap> samples, HashMap> tests) {
+ for (String key : tests.keySet()) {
+ HashMap distantto = new HashMap();
+ for (String key2 : samples.keySet()) {
+ double distanttt = cal_textdistant(tests.get(key),
+ samples.get(key2));
+ // System.out.println("distant between " + key + " and " + key2
+ // + ": " + distanttt);
+ distantto.put(key2, distanttt);
+
+ }
+ distant.put(key, distantto);
+ }
+
+ return distant;
+ }
+
+ private double cal_textdistant(HashMap text1,
+ HashMap text2) {
+ double nboftoken = 0.0;
+ double sum = 0.0;
+
+ Set keys1 = text1.keySet();
+ Set keys2 = text2.keySet();
+ Set allkeys = new HashSet();
+ allkeys.addAll(keys1);
+ allkeys.addAll(keys2);
+ Integer Na = 0, Nb = 0;
+ // get the nb of token in each text
+ for (String key : allkeys) {
+ Integer Fa = 0;
+ Integer Fb = 0;
+ if (text1.containsKey(key)) {
+ Fa = text1.get(key);
+ }
+ if (text2.containsKey(key)) {
+ Fb = text2.get(key);
+ }
+ Na += Fa;
+ Nb += Fb;
+ }
+ // reduce propotion for text of different lenght
+ if (Na <= Nb) {
+ for (String key : allkeys) {
+ Integer Fa = 0;
+ Integer Fb = 0;
+ if (text1.containsKey(key)) {
+ Fa = text1.get(key);
+ }
+ if (text2.containsKey(key)) {
+ Fb = text2.get(key);
+ }
+ sum += Math.abs(Fa - (double) Fb * (Na / (double) Nb));
+ }
+ return sum / (2 * Na);
+ } else {
+ for (String key : allkeys) {
+ Integer Fa = 0;
+ Integer Fb = 0;
+ if (text1.containsKey(key)) {
+ Fa = text1.get(key);
+ }
+ if (text2.containsKey(key)) {
+ Fb = text2.get(key);
+ }
+ sum += Math.abs(Fa * (Nb / (double) Na) - (double) Fb);
+ }
+ return sum / (2 * Nb);
+ }
+ }
+}
diff --git a/src/fr/imag/Scidetect/Checker/Indexer.java b/src/fr/imag/forge/Scidetect/Checker/Indexer.java
similarity index 97%
rename from src/fr/imag/Scidetect/Checker/Indexer.java
rename to src/fr/imag/forge/Scidetect/Checker/Indexer.java
index 479fef8e53d7aef21be288ada91749f9907781bc..f386b08abbfb8741561b10c33e04f016b41f390d 100644
--- a/src/fr/imag/Scidetect/Checker/Indexer.java
+++ b/src/fr/imag/forge/Scidetect/Checker/Indexer.java
@@ -14,7 +14,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
-package fr.imag.Scidetect.Checker;
+package fr.imag.forge.Scidetect.Checker;
import java.io.File;
import java.io.FileNotFoundException;
diff --git a/src/fr/imag/forge/Scidetect/Checker/Indexer.java~ b/src/fr/imag/forge/Scidetect/Checker/Indexer.java~
new file mode 100644
index 0000000000000000000000000000000000000000..23216fcff7e6248998983bd5987ef66fbae87a38
--- /dev/null
+++ b/src/fr/imag/forge/Scidetect/Checker/Indexer.java~
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
+ * author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+package fr.imag.Scidetect.Checker;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintWriter;
+import java.util.HashMap;
+
+/**
+ *
+ * @author tien
+ */
+public class Indexer {
+
+ private Object content;
+
+ public void index(String content, File textfile) throws FileNotFoundException {
+ String filename = textfile.getName().substring(0,textfile.getName().lastIndexOf("."));
+ filename+=".txt";
+ String path = textfile.getParent();
+
+ String[] words = content.split(" ");
+ //System.out.println(words.length);
+ HashMap counter = new HashMap();
+ for (int i = 0; i < words.length; i++) {
+ if (!counter.containsKey(words[i])) {
+ counter.put(words[i], 1);
+ } else {
+ counter.put(words[i], counter.get(words[i]) + 1);
+ }
+ }
+
+ File indexout = new File(path + "/INDEX-" + filename);
+ // String filepath = (indexout.getPath());
+ PrintWriter out = new PrintWriter(indexout);
+
+ for (String key : counter.keySet()) {
+ out.println(key + "\t" + counter.get(key));
+ }
+ out.close();
+ }
+}
diff --git a/src/fr/imag/Scidetect/Checker/Reader.java b/src/fr/imag/forge/Scidetect/Checker/Reader.java
similarity index 97%
rename from src/fr/imag/Scidetect/Checker/Reader.java
rename to src/fr/imag/forge/Scidetect/Checker/Reader.java
index e187804de9e51c874ede492ea94735248d6a521f..5edf143bc27e1d05a374aef4bad88b0a8f1d3b50 100644
--- a/src/fr/imag/Scidetect/Checker/Reader.java
+++ b/src/fr/imag/forge/Scidetect/Checker/Reader.java
@@ -14,10 +14,10 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
-package fr.imag.Scidetect.Checker;
+package fr.imag.forge.Scidetect.Checker;
-import fr.imag.Scidetect.TextExtractor.Xmlextractor;
-import fr.imag.Scidetect.TextExtractor.pdfextractor;
+import fr.imag.forge.Scidetect.TextExtractor.Xmlextractor;
+import fr.imag.forge.Scidetect.TextExtractor.pdfextractor;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
diff --git a/src/fr/imag/forge/Scidetect/Checker/Reader.java~ b/src/fr/imag/forge/Scidetect/Checker/Reader.java~
new file mode 100644
index 0000000000000000000000000000000000000000..879d14df1faeb7200c080b2be6b92d78ff1c250a
--- /dev/null
+++ b/src/fr/imag/forge/Scidetect/Checker/Reader.java~
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
+ * author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+package fr.imag.Scidetect.Checker;
+
+import fr.imag.Scidetect.TextExtractor.Xmlextractor;
+import fr.imag.Scidetect.TextExtractor.pdfextractor;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ *
+ * @author tien
+ */
+public class Reader {
+
+ private HashMap> samples = new HashMap>();
+ private HashMap> tests = new HashMap>();
+ private String SamplesFolder;
+ private int maxlength;
+
+ public void readconfig() throws FileNotFoundException, IOException {
+ File conf = new File("config.txt");
+ BufferedReader br = new BufferedReader(new FileReader(conf));
+ String line;
+
+ while ((line = br.readLine()) != null) {
+ if (!line.startsWith("#")) {
+ // System.out.println(line);
+ String[] b = line.split("\t");
+ if (b[0].equals("samples")) {
+ SamplesFolder = b[1];
+ }
+ //other config should be read over here
+ if (b[0].equals("Max_length")) {
+ maxlength = Integer.parseInt(b[1]);
+ }
+ }
+ }
+
+ }
+
+ public HashMap> readsamples(String foldername) throws IOException {
+ File folder = new File(foldername);
+ File[] listOfFile = folder.listFiles();
+
+ for (int j = 0; j < listOfFile.length; j++) {
+ if (listOfFile[j].isDirectory()) {
+ readsamples(listOfFile[j].getPath());
+ } else if (listOfFile[j].getName().endsWith(".pdf")) {
+ // find if there is already index for it
+ String indexname = "INDEX-"
+ + listOfFile[j].getName().substring(0,
+ listOfFile[j].getName().lastIndexOf("."))
+ + ".txt";
+ if (Arrays.asList(listOfFile).toString().contains(indexname)) {
+ // System.out.println("lets read from index file");
+ readindexfile(listOfFile[j].getParent() + "/" + indexname);
+ } else {
+ pdfextractor a = new pdfextractor();
+ String content = a.pdfextract(listOfFile[j]);
+ Indexer b = new Indexer();
+ b.index(content, listOfFile[j]);
+ readindexfile(listOfFile[j].getParent() + "/" + indexname);
+ }
+ }
+ }
+ return samples;
+ }
+
+ private void readindexfile(String path) throws IOException {
+ File index = new File(path);
+ BufferedReader br;
+ br = new BufferedReader(new FileReader(index));
+ String line;
+ HashMap a = new HashMap();
+ while ((line = br.readLine()) != null) {
+ String[] b = line.split("\t");
+ a.put(b[0], Integer.parseInt(b[1]));
+ }
+ br.close();
+ if (path.contains(SamplesFolder)) {
+ samples.put(path, a);
+ } else {
+ tests.put(path, a);
+ }
+ }
+
+ private void readfile(File pdf) throws IOException {
+
+ String content = "";
+ if (pdf.getName().endsWith(".pdf")) {
+
+ pdfextractor a = new pdfextractor();
+ content = a.pdfextract(pdf);
+ } else if (pdf.getName().endsWith(".xml") || pdf.getName().endsWith(".xtx")) {
+ Xmlextractor a = new Xmlextractor();
+ content = a.xmlextract(pdf);
+ }
+
+ //lets deal with long file over here
+ //split content and the index part by part
+ if (content.length() < maxlength) {
+ String indexname = "INDEX-"
+ + pdf.getName().substring(0,
+ pdf.getName().lastIndexOf("."))
+ + ".txt";
+
+ Indexer b = new Indexer();
+ b.index(content, pdf);
+
+ readindexfile(pdf.getParent() + "/" + indexname);
+ } else {
+ String[] part = splitcontent(content);
+ for (int i = 0; i < part.length; i++) {
+ String indexname = "INDEX-"
+ + pdf.getName().substring(0,
+ pdf.getName().lastIndexOf("."))
+ + "_part" + i + ".txt";
+ String filename = pdf.getName().substring(0,
+ pdf.getName().lastIndexOf("."))
+ + "_part" + i + ".txt";
+ Indexer b = new Indexer();
+ File a = new File(pdf.getParent() + "/" + filename);
+ PrintWriter out = new PrintWriter(new FileWriter(a));
+ out.println(part[i]);
+ //System.out.println(text);
+ out.close();
+ b.index(part[i], a);
+
+ readindexfile(a.getParent() + "/" + indexname);
+
+ }
+ }
+ }
+
+ public HashMap> readtests(String testpath) throws IOException {
+ File folder = new File(testpath);
+
+ if (folder.isDirectory()) {
+ File[] listOfFile = folder.listFiles();
+
+ for (int j = 0; j < listOfFile.length; j++) {
+
+ if (listOfFile[j].isDirectory()) {
+ readtests(listOfFile[j].getPath());
+ } else if (listOfFile[j].getName().endsWith(".pdf") || listOfFile[j].getName().endsWith(".xml") || listOfFile[j].getName().endsWith(".xtx")) {
+ readfile(listOfFile[j]);
+ }
+ }
+ } else if (folder.getName().endsWith(".pdf") || folder.getName().endsWith(".xml") || folder.getName().endsWith(".xtx")) {
+ readfile(folder);
+
+ }
+ return tests;
+ }
+
+ private String[] splitcontent(String content) {
+
+ int nbofpart = content.length() / maxlength;
+ String[] part = new String[nbofpart + 1];
+ int lower = 0;
+ int upper = 0;
+ int i;
+ for (i = 0; i < nbofpart; i++) {
+ upper += maxlength;
+ part[i] = content.substring(lower, upper);
+ lower = upper;
+ }
+
+ if (upper <= content.length() - 1) {
+
+ lower = upper;
+
+ upper = content.length();
+
+ part[i] = (content.substring(lower, upper));
+ }
+ return part;
+ }
+
+}
diff --git a/src/fr/imag/Scidetect/Logger/Log.java b/src/fr/imag/forge/Scidetect/Logger/Log.java
similarity index 98%
rename from src/fr/imag/Scidetect/Logger/Log.java
rename to src/fr/imag/forge/Scidetect/Logger/Log.java
index cdbf868ca01c456c798a5224d67bad07fd8f9fb8..cc45a490b47fac17169d559dde1500b472199da9 100644
--- a/src/fr/imag/Scidetect/Logger/Log.java
+++ b/src/fr/imag/forge/Scidetect/Logger/Log.java
@@ -14,7 +14,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
-package fr.imag.Scidetect.Logger;
+package fr.imag.forge.Scidetect.Logger;
import java.io.File;
import java.io.FileNotFoundException;
diff --git a/src/fr/imag/forge/Scidetect/Logger/Log.java~ b/src/fr/imag/forge/Scidetect/Logger/Log.java~
new file mode 100644
index 0000000000000000000000000000000000000000..6612c76563db173a025af6063ae3da7e4d6d2fbf
--- /dev/null
+++ b/src/fr/imag/forge/Scidetect/Logger/Log.java~
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
+ * author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+package fr.imag.Scidetect.Logger;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintWriter;
+import java.util.HashMap;
+
+/**
+ *
+ * @author tien
+ */
+public class Log {
+
+ public static String loglocation;
+ public static String detailloglocation;
+ // public static String testpath;
+ public static String logtime;
+
+ public void savedetaillog(HashMap> distant) {
+
+ File distantout = new File(detailloglocation + logtime + ".xls");
+ //File distantout = new File(testpath+"/alldistant.xls");
+ PrintWriter out;
+ try {
+ out = new PrintWriter(distantout);
+
+ for (String key : distant.keySet()) {
+ for (String key2 : distant.get(key).keySet()) {
+ out.println(key + "\t" + key2 + "\t"
+ + distant.get(key).get(key2));
+ }
+ }
+ out.close();
+ } catch (FileNotFoundException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+
+ public void savelog(String conclusion) {
+ File distantout ;
+ if (!loglocation.equals("logs/")) {
+ distantout = new File(loglocation);
+ } else {
+ distantout = new File(loglocation+ logtime + ".xls");
+ }
+
+ PrintWriter out;
+ try {
+
+ out = new PrintWriter(distantout);
+
+ out.write(conclusion);
+
+ out.close();
+ } catch (FileNotFoundException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/src/fr/imag/Scidetect/TextExtractor/Xmlextractor.java b/src/fr/imag/forge/Scidetect/TextExtractor/Xmlextractor.java
similarity index 98%
rename from src/fr/imag/Scidetect/TextExtractor/Xmlextractor.java
rename to src/fr/imag/forge/Scidetect/TextExtractor/Xmlextractor.java
index b7c6bbab2665e29c30bb7414e36009e0e318c06e..6aa48584f449426ebbd18b64ac2afaab4db6c83e 100644
--- a/src/fr/imag/Scidetect/TextExtractor/Xmlextractor.java
+++ b/src/fr/imag/forge/Scidetect/TextExtractor/Xmlextractor.java
@@ -14,7 +14,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
-package fr.imag.Scidetect.TextExtractor;
+package fr.imag.forge.Scidetect.TextExtractor;
import static java.awt.SystemColor.text;
import java.io.File;
diff --git a/src/fr/imag/forge/Scidetect/TextExtractor/Xmlextractor.java~ b/src/fr/imag/forge/Scidetect/TextExtractor/Xmlextractor.java~
new file mode 100644
index 0000000000000000000000000000000000000000..ad1a1cdbbe1c4693f6440a3281ac60076563a86f
--- /dev/null
+++ b/src/fr/imag/forge/Scidetect/TextExtractor/Xmlextractor.java~
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+package fr.imag.Scidetect.TextExtractor;
+
+import static java.awt.SystemColor.text;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+/**
+ *
+ * @author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
+ */
+public class Xmlextractor {
+
+ private String text = "";
+
+ private void printNote(NodeList nodeList) {
+
+ for (int count = 0; count < nodeList.getLength(); count++) {
+
+ Node tempNode = nodeList.item(count);
+
+ // make sure it's element node.
+ if (tempNode.getNodeType() == Node.ELEMENT_NODE) {
+ if (tempNode.getNodeName() == "Para" || tempNode.getNodeName() == "PDFTextExtract") {
+ text += tempNode.getTextContent();
+ }
+ if (tempNode.hasChildNodes()) {
+ // loop again if has child nodes
+ printNote(tempNode.getChildNodes());
+ }
+ }
+ }
+ //return text;
+ }
+
+ public String xmlextract(File xml) throws IOException {
+ try {
+ DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+
+ org.w3c.dom.Document doc = dBuilder.parse(xml);
+
+ //System.out.println(doc.getDocumentElement().getNodeName());
+ if (doc.hasChildNodes()) {
+
+ printNote(doc.getChildNodes());
+
+ }
+
+ } catch (ParserConfigurationException ex) {
+ Logger.getLogger(Xmlextractor.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (SAXException ex) {
+ Logger.getLogger(Xmlextractor.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (IOException ex) {
+ Logger.getLogger(Xmlextractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ File totxt = new File(xml.getPath()
+ .substring(0, xml.getPath().lastIndexOf('.')) + ".txt");
+ // File txt = new File(xml.getParent()+xml.getName().substring(0,xml.getName().lastIndexOf("."))+".txt");
+ PrintWriter out = new PrintWriter(new FileWriter(totxt));
+ out.println(text);
+ out.close();
+ normalizer a = new normalizer();
+ String content = a.normalize(totxt);
+
+ return content;
+ }
+}
diff --git a/src/fr/imag/Scidetect/TextExtractor/commandexecutor.java b/src/fr/imag/forge/Scidetect/TextExtractor/commandexecutor.java
similarity index 97%
rename from src/fr/imag/Scidetect/TextExtractor/commandexecutor.java
rename to src/fr/imag/forge/Scidetect/TextExtractor/commandexecutor.java
index 7ca3c39ee18a4a553398d5d6b182bbc6f222de86..39e6a0c2c91a26bb3fb3e6a572498fa701e51dd2 100644
--- a/src/fr/imag/Scidetect/TextExtractor/commandexecutor.java
+++ b/src/fr/imag/forge/Scidetect/TextExtractor/commandexecutor.java
@@ -14,7 +14,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
-package fr.imag.Scidetect.TextExtractor;
+package fr.imag.forge.Scidetect.TextExtractor;
import java.io.BufferedReader;
import java.io.IOException;
diff --git a/src/fr/imag/forge/Scidetect/TextExtractor/commandexecutor.java~ b/src/fr/imag/forge/Scidetect/TextExtractor/commandexecutor.java~
new file mode 100644
index 0000000000000000000000000000000000000000..879b91fe29509957ea2a9fc151b6cbe7a473d86d
--- /dev/null
+++ b/src/fr/imag/forge/Scidetect/TextExtractor/commandexecutor.java~
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
+ * author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+package fr.imag.Scidetect.TextExtractor;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+
+/**
+ *
+ * @author tien
+ */
+public class commandexecutor {
+ public static String execute(String command){
+ StringBuilder sb = new StringBuilder();
+ String[] commands = new String[]{"/bin/sh","-c", command};
+ try {
+ Process proc = new ProcessBuilder(commands).start();
+ BufferedReader stdInput = new BufferedReader(new
+ InputStreamReader(proc.getInputStream()));
+
+ BufferedReader stdError = new BufferedReader(new
+ InputStreamReader(proc.getErrorStream()));
+
+ String s = null;
+ while ((s = stdInput.readLine()) != null) {
+ sb.append(s);
+ sb.append("\n");
+ }
+
+ while ((s = stdError.readLine()) != null) {
+ sb.append(s);
+ sb.append("\n");
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ return "done";
+ }
+}
diff --git a/src/fr/imag/Scidetect/TextExtractor/normalizer.java b/src/fr/imag/forge/Scidetect/TextExtractor/normalizer.java
similarity index 97%
rename from src/fr/imag/Scidetect/TextExtractor/normalizer.java
rename to src/fr/imag/forge/Scidetect/TextExtractor/normalizer.java
index 56b459e0df0798ea4729c832cbacc5145ee056c5..2f54b08775ab2e5bba275069d0c92706ce8543e0 100644
--- a/src/fr/imag/Scidetect/TextExtractor/normalizer.java
+++ b/src/fr/imag/forge/Scidetect/TextExtractor/normalizer.java
@@ -14,7 +14,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
-package fr.imag.Scidetect.TextExtractor;
+package fr.imag.forge.Scidetect.TextExtractor;
import java.io.BufferedReader;
import java.io.File;
diff --git a/src/fr/imag/forge/Scidetect/TextExtractor/normalizer.java~ b/src/fr/imag/forge/Scidetect/TextExtractor/normalizer.java~
new file mode 100644
index 0000000000000000000000000000000000000000..a9ecfef27bfcc43f1fdd16d3a8514803512a2c32
--- /dev/null
+++ b/src/fr/imag/forge/Scidetect/TextExtractor/normalizer.java~
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
+ * author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+package fr.imag.Scidetect.TextExtractor;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+
+/**
+ *
+ * @author tien
+ */
+public class normalizer {
+
+ public String normalize(File txt) throws IOException {
+ BufferedReader br;
+ br = new BufferedReader(new FileReader(txt));
+ String line;
+ String content = "";
+ while ((line = br.readLine()) != null) {
+ content += " ";
+ content += line;
+
+ }
+ br.close();
+ content = content.toUpperCase();
+ content = content.replaceAll("-", " ");// parenthesis
+ content = content.replaceAll("[^A-Z ]", "");// non A to Z
+
+ content = content.replaceAll("\n", " ");//prob not nessesary :D
+ content = content.replaceAll("\\s+", " ");// remove extra spaces
+
+ PrintWriter out = new PrintWriter(txt);
+ out.println(content);
+ out.close();
+ return content;
+ }
+
+}
diff --git a/src/fr/imag/Scidetect/TextExtractor/pdfextractor.java b/src/fr/imag/forge/Scidetect/TextExtractor/pdfextractor.java
similarity index 97%
rename from src/fr/imag/Scidetect/TextExtractor/pdfextractor.java
rename to src/fr/imag/forge/Scidetect/TextExtractor/pdfextractor.java
index e06e766ce3874c1f2d32baed61b3a6b8003a509c..dc750af2adbb3cf8a06f711311322a1dcc99414c 100644
--- a/src/fr/imag/Scidetect/TextExtractor/pdfextractor.java
+++ b/src/fr/imag/forge/Scidetect/TextExtractor/pdfextractor.java
@@ -14,7 +14,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
-package fr.imag.Scidetect.TextExtractor;
+package fr.imag.forge.Scidetect.TextExtractor;
import java.io.BufferedWriter;
import java.io.File;
diff --git a/src/fr/imag/forge/Scidetect/TextExtractor/pdfextractor.java~ b/src/fr/imag/forge/Scidetect/TextExtractor/pdfextractor.java~
new file mode 100644
index 0000000000000000000000000000000000000000..cea9d9644bc9b8247819da454b5a0b8ffed47915
--- /dev/null
+++ b/src/fr/imag/forge/Scidetect/TextExtractor/pdfextractor.java~
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
+ * author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+package fr.imag.Scidetect.TextExtractor;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.text.Normalizer;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.util.PDFTextStripper;
+
+/**
+ *
+ * @author tien
+ */
+public class pdfextractor {
+
+ public String pdfextract(File pdf) throws IOException {
+
+ PDFTextStripper stripper = new PDFTextStripper();
+ PDDocument pd;
+ BufferedWriter wr;
+ System.out
+ .println("converting: " + pdf.getPath());
+ File totxt = new File(pdf.getPath()
+ .substring(0, pdf.getPath().lastIndexOf('.')) + ".txt");
+
+ try {
+ pd = PDDocument.load(pdf.getPath());
+ wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(totxt)));
+ stripper.writeText(pd, wr);
+ if (pd != null) {
+ pd.close();
+ }
+ // I use close() to flush the stream.
+ wr.close();
+ } catch (Exception e) {
+ // TODO: handle exception
+ }
+ //this seems to be faster but it seems like the app server does not support pdftotext
+ //commandexecutor cm = new commandexecutor();
+ //cm.execute("pdftotext "+ listOfFile[j].getPath());
+
+ // ok now I have the txt file; lets normalize it
+ normalizer a = new normalizer();
+ String content = a.normalize(totxt);
+ return content;
+ }
+
+}
diff --git a/src/fr/imag/forge/Scidetect/scigenchecker_local/ScigenChecker_Local.java b/src/fr/imag/forge/Scidetect/scigenchecker_local/ScigenChecker_Local.java
new file mode 100644
index 0000000000000000000000000000000000000000..a115e12c65c1c1850cd3486bd07998b89960d12f
--- /dev/null
+++ b/src/fr/imag/forge/Scidetect/scigenchecker_local/ScigenChecker_Local.java
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package fr.imag.forge.Scidetect.scigenchecker_local;
+
+import fr.imag.forge.Scidetect.Checker.Classifier;
+import fr.imag.forge.Scidetect.Checker.DistantCalculator;
+import fr.imag.forge.Scidetect.Checker.Indexer;
+import fr.imag.forge.Scidetect.Checker.Reader;
+import fr.imag.forge.Scidetect.Logger.Log;
+import fr.imag.forge.Scidetect.TextExtractor.pdfextractor;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
+
+/**
+ *
+ * @author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
+ */
+public class ScigenChecker_Local {
+
+ // private String loglocation;
+ // private String detailloglocation;
+ private String testpath;
+ //private String logtime;
+ private HashMap> samples = new HashMap>();
+ private HashMap> tests = new HashMap>();
+ private String SamplesFolder;
+ private HashMap> distant = new HashMap>();
+ private Boolean savedetaillog = false;
+
+ private void readconfig() throws FileNotFoundException, IOException {
+ File conf = new File("config.txt");
+ BufferedReader br = new BufferedReader(new FileReader(conf));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (!line.startsWith("#")) {
+ // System.out.println(line);
+ String[] b = line.split("\t");
+ if (b[0].equals("samples")) {
+ SamplesFolder = b[1];
+ }
+ if (b[0].equals("Default_log_folder")) {
+
+ Log.loglocation = b[1];
+ // System.out.println(loglocation);
+ }
+ if (b[0].equals("Default_detail_log_folder")) {
+ Log.detailloglocation = b[1];
+ //System.out.println(detailloglocation);
+ }
+ }
+ }
+
+ }
+
+ private void compute() throws IOException {
+ if (testpath != null) {
+ DateFormat dateFormat = new SimpleDateFormat("HH:mm dd.MM.yyyy");
+ Date date = new Date();
+ Log.logtime = dateFormat.format(date);
+ try {
+ Reader reader = new Reader();
+
+ reader.readconfig();
+ samples = reader.readsamples(SamplesFolder);
+ tests = reader.readtests(testpath);
+
+ } catch (Exception e) {
+
+ e.printStackTrace();
+ }
+
+ DistantCalculator dc = new DistantCalculator();
+ distant = dc.caldistant(samples, tests);
+ Classifier cl = new Classifier();
+ String conclusion = cl.classify(distant);
+ System.out.println(conclusion);
+ Log log = new Log();
+ log.savelog(conclusion);
+ if (savedetaillog) {
+ log.savedetaillog(distant);
+ }
+ } else {
+ System.out.println("can not read path to test folder");
+ }
+ }
+
+ public void readargs(String[] args) {
+
+ for (int i = 0; i < args.length; i += 1) {
+ // System.out.println(args[i]);
+ if (args[i].equals("-l")) {
+ Log.loglocation = args[i + 1];
+ }
+ if (args[i].equals("-c")) {
+ testpath = args[i + 1];
+ }
+ if (args[i].equals("-d")) {
+ savedetaillog = true;
+ }
+ }
+ }
+
+ /**
+ * @param args the command line arguments
+ */
+ public static void main(String[] args) throws IOException {
+
+ ScigenChecker_Local a = new ScigenChecker_Local();
+ a.readconfig();
+ a.readargs(args);
+ a.compute();
+
+ }
+
+}
diff --git a/src/fr/imag/Scidetect/scigenchecker_local/ScigenChecker_Local.java b/src/fr/imag/forge/Scidetect/scigenchecker_local/ScigenChecker_Local.java~
similarity index 98%
rename from src/fr/imag/Scidetect/scigenchecker_local/ScigenChecker_Local.java
rename to src/fr/imag/forge/Scidetect/scigenchecker_local/ScigenChecker_Local.java~
index fd944ba2427134a568555c620594e18aa549a483..9f1223e26132cf421f3a56d6aed0e1eb0ad3f32f 100644
--- a/src/fr/imag/Scidetect/scigenchecker_local/ScigenChecker_Local.java
+++ b/src/fr/imag/forge/Scidetect/scigenchecker_local/ScigenChecker_Local.java~
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
+ * author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -36,7 +37,7 @@ import java.util.HashMap;
/**
*
- * @author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
+ * @author tien
*/
public class ScigenChecker_Local {