Commit f8970f4d authored by Tien's avatar Tien

Update file reader method

parent 46ba435d
......@@ -59,7 +59,7 @@ public class Classifier {
for (String key : distant.keySet()) {
//for each file in the test
result = find_NN(distant.get(key));
System.out.println(result);
//System.out.println(result);
//System.out.println(key);
//System.out.println(result);
String[] a = checkdistant(result).split("\n");
......
......@@ -109,6 +109,54 @@ public class Reader {
}
}
private void readfile(File pdf) throws IOException {
String content = "";
if (pdf.getName().endsWith(".pdf")) {
pdfextractor a = new pdfextractor();
content = a.pdfextract(pdf);
} else if (pdf.getName().endsWith(".xml") || pdf.getName().endsWith(".xtx")) {
Xmlextractor a = new Xmlextractor();
content = a.xmlextract(pdf);
}
//lets deal with long file over here
//split content and the index part by part
if (content.length() < maxlength) {
String indexname = "INDEX-"
+ pdf.getName().substring(0,
pdf.getName().lastIndexOf("."))
+ ".txt";
Indexer b = new Indexer();
b.index(content, pdf);
readindexfile(pdf.getParent() + "/" + indexname);
} else {
String[] part = splitcontent(content);
for (int i = 0; i < part.length; i++) {
String indexname = "INDEX-"
+ pdf.getName().substring(0,
pdf.getName().lastIndexOf("."))
+ "_part" + i + ".txt";
String filename = pdf.getName().substring(0,
pdf.getName().lastIndexOf("."))
+ "_part" + i + ".txt";
Indexer b = new Indexer();
File a = new File(pdf.getParent() + "/" + filename);
PrintWriter out = new PrintWriter(new FileWriter(a));
out.println(part[i]);
//System.out.println(text);
out.close();
b.index(part[i], a);
readindexfile(a.getParent() + "/" + indexname);
}
}
}
public HashMap<String, HashMap<String, Integer>> readtests(String testpath) throws IOException {
File folder = new File(testpath);
......@@ -120,98 +168,12 @@ public class Reader {
if (listOfFile[j].isDirectory()) {
readtests(listOfFile[j].getPath());
} else if (listOfFile[j].getName().endsWith(".pdf") || listOfFile[j].getName().endsWith(".xml") || listOfFile[j].getName().endsWith(".xtx")) {
String content = "";
if (listOfFile[j].getName().endsWith(".pdf")) {
pdfextractor a = new pdfextractor();
content = a.pdfextract(listOfFile[j]);
} else if (listOfFile[j].getName().endsWith(".xml") || listOfFile[j].getName().endsWith(".xtx")) {
Xmlextractor a = new Xmlextractor();
content = a.xmlextract(listOfFile[j]);
}
//lets deal with long file over here
//split content and the index part by part
if (content.length() < maxlength) {
String indexname = "INDEX-"
+ listOfFile[j].getName().substring(0,
listOfFile[j].getName().lastIndexOf("."))
+ ".txt";
Indexer b = new Indexer();
b.index(content, listOfFile[j]);
readindexfile(listOfFile[j].getParent() + "/" + indexname);
} else {
String[] part = splitcontent(content);
for (int i = 0; i < part.length; i++) {
String indexname = "INDEX-"
+ listOfFile[j].getName().substring(0,
listOfFile[j].getName().lastIndexOf("."))
+ "_part" + i + ".txt";
String filename = listOfFile[j].getName().substring(0,
listOfFile[j].getName().lastIndexOf("."))
+ "_part" + i + ".txt";
Indexer b = new Indexer();
File a = new File(listOfFile[j].getParent() + "/" + filename);
PrintWriter out = new PrintWriter(new FileWriter(a));
out.println(part[i]);
//System.out.println(text);
out.close();
b.index(part[i], a);
readindexfile(a.getParent() + "/" + indexname);
}
}
readfile(listOfFile[j]);
}
}
} else if (folder.getName().endsWith(".pdf") || folder.getName().endsWith(".xml") || folder.getName().endsWith(".xtx")) {
String content = "";
if (folder.getName().endsWith(".pdf")) {
pdfextractor a = new pdfextractor();
content = a.pdfextract(folder);
} else if (folder.getName().endsWith(".xml") || folder.getName().endsWith(".xtx")) {
Xmlextractor a = new Xmlextractor();
content = a.xmlextract(folder);
}
//lets deal with long file over here
//split content and the index part by part
if (content.length() < maxlength) {
String indexname = "INDEX-"
+ folder.getName().substring(0,
folder.getName().lastIndexOf("."))
+ ".txt";
readfile(folder);
Indexer b = new Indexer();
b.index(content, folder);
readindexfile(folder.getParent() + "/" + indexname);
} else {
String[] part = splitcontent(content);
for (int i = 0; i < part.length; i++) {
String indexname = "INDEX-"
+ folder.getName().substring(0,
folder.getName().lastIndexOf("."))
+ "_part" + i + ".txt";
String filename = folder.getName().substring(0,
folder.getName().lastIndexOf("."))
+ "_part" + i + ".txt";
Indexer b = new Indexer();
File a = new File(folder.getParent() + "/" + filename);
PrintWriter out = new PrintWriter(new FileWriter(a));
out.println(part[i]);
//System.out.println(text);
out.close();
b.index(part[i], a);
readindexfile(a.getParent() + "/" + indexname);
}
}
}
return tests;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment