diff --git a/scidetect/src/Checker/Classifier.java b/scidetect/src/Checker/Classifier.java index 671f72f03ed5065c12570b6615eef0f8cbedbd1e..4be10b9b9077a754a0e90f6e1fc30f870af386e7 100644 --- a/scidetect/src/Checker/Classifier.java +++ b/scidetect/src/Checker/Classifier.java @@ -59,7 +59,7 @@ public class Classifier { for (String key : distant.keySet()) { //for each file in the test result = find_NN(distant.get(key)); - System.out.println(result); + //System.out.println(result); //System.out.println(key); //System.out.println(result); String[] a = checkdistant(result).split("\n"); diff --git a/scidetect/src/Checker/Reader.java b/scidetect/src/Checker/Reader.java index 53e69fdcd19e40cf5f159993b08279870ecb12d5..649141129a1bdc4ab9cb3e4daccd3ba37ff4d14e 100644 --- a/scidetect/src/Checker/Reader.java +++ b/scidetect/src/Checker/Reader.java @@ -109,6 +109,54 @@ public class Reader { } } + private void readfile(File pdf) throws IOException { + + String content = ""; + if (pdf.getName().endsWith(".pdf")) { + + pdfextractor a = new pdfextractor(); + content = a.pdfextract(pdf); + } else if (pdf.getName().endsWith(".xml") || pdf.getName().endsWith(".xtx")) { + Xmlextractor a = new Xmlextractor(); + content = a.xmlextract(pdf); + } + + //lets deal with long file over here + //split content and the index part by part + if (content.length() < maxlength) { + String indexname = "INDEX-" + + pdf.getName().substring(0, + pdf.getName().lastIndexOf(".")) + + ".txt"; + + Indexer b = new Indexer(); + b.index(content, pdf); + + readindexfile(pdf.getParent() + "/" + indexname); + } else { + String[] part = splitcontent(content); + for (int i = 0; i < part.length; i++) { + String indexname = "INDEX-" + + pdf.getName().substring(0, + pdf.getName().lastIndexOf(".")) + + "_part" + i + ".txt"; + String filename = pdf.getName().substring(0, + pdf.getName().lastIndexOf(".")) + + "_part" + i + ".txt"; + Indexer b = new Indexer(); + File a = new File(pdf.getParent() + "/" + filename); + PrintWriter out = new PrintWriter(new FileWriter(a)); + out.println(part[i]); + //System.out.println(text); + out.close(); + b.index(part[i], a); + + readindexfile(a.getParent() + "/" + indexname); + + } + } + } + public HashMap> readtests(String testpath) throws IOException { File folder = new File(testpath); @@ -120,98 +168,12 @@ public class Reader { if (listOfFile[j].isDirectory()) { readtests(listOfFile[j].getPath()); } else if (listOfFile[j].getName().endsWith(".pdf") || listOfFile[j].getName().endsWith(".xml") || listOfFile[j].getName().endsWith(".xtx")) { - String content = ""; - if (listOfFile[j].getName().endsWith(".pdf")) { - - pdfextractor a = new pdfextractor(); - content = a.pdfextract(listOfFile[j]); - } else if (listOfFile[j].getName().endsWith(".xml") || listOfFile[j].getName().endsWith(".xtx")) { - Xmlextractor a = new Xmlextractor(); - content = a.xmlextract(listOfFile[j]); - } - - //lets deal with long file over here - //split content and the index part by part - if (content.length() < maxlength) { - String indexname = "INDEX-" - + listOfFile[j].getName().substring(0, - listOfFile[j].getName().lastIndexOf(".")) - + ".txt"; - - Indexer b = new Indexer(); - b.index(content, listOfFile[j]); - - readindexfile(listOfFile[j].getParent() + "/" + indexname); - } else { - String[] part = splitcontent(content); - for (int i = 0; i < part.length; i++) { - String indexname = "INDEX-" - + listOfFile[j].getName().substring(0, - listOfFile[j].getName().lastIndexOf(".")) - + "_part" + i + ".txt"; - String filename = listOfFile[j].getName().substring(0, - listOfFile[j].getName().lastIndexOf(".")) - + "_part" + i + ".txt"; - Indexer b = new Indexer(); - File a = new File(listOfFile[j].getParent() + "/" + filename); - PrintWriter out = new PrintWriter(new FileWriter(a)); - out.println(part[i]); - //System.out.println(text); - out.close(); - b.index(part[i], a); - - readindexfile(a.getParent() + "/" + indexname); - - } - } - + readfile(listOfFile[j]); } } } else if (folder.getName().endsWith(".pdf") || folder.getName().endsWith(".xml") || folder.getName().endsWith(".xtx")) { - String content = ""; - if (folder.getName().endsWith(".pdf")) { - - pdfextractor a = new pdfextractor(); - content = a.pdfextract(folder); - } else if (folder.getName().endsWith(".xml") || folder.getName().endsWith(".xtx")) { - Xmlextractor a = new Xmlextractor(); - content = a.xmlextract(folder); - } - - //lets deal with long file over here - //split content and the index part by part - if (content.length() < maxlength) { - String indexname = "INDEX-" - + folder.getName().substring(0, - folder.getName().lastIndexOf(".")) - + ".txt"; + readfile(folder); - Indexer b = new Indexer(); - b.index(content, folder); - - readindexfile(folder.getParent() + "/" + indexname); - } else { - String[] part = splitcontent(content); - for (int i = 0; i < part.length; i++) { - String indexname = "INDEX-" - + folder.getName().substring(0, - folder.getName().lastIndexOf(".")) - + "_part" + i + ".txt"; - String filename = folder.getName().substring(0, - folder.getName().lastIndexOf(".")) - + "_part" + i + ".txt"; - Indexer b = new Indexer(); - File a = new File(folder.getParent() + "/" + filename); - PrintWriter out = new PrintWriter(new FileWriter(a)); - out.println(part[i]); - //System.out.println(text); - out.close(); - b.index(part[i], a); - - readindexfile(a.getParent() + "/" + indexname); - - } - } } return tests; }