Commit 05c78f85 authored by Tien's avatar Tien

removed some debug functions

parent 7d4318f2
......@@ -119,6 +119,7 @@ public class Reader {
readtests(listOfFile[j].getPath(), Samplecorpus, savedetaillog);
} else if (listOfFile[j].getName().endsWith(".pdf") || listOfFile[j].getName().endsWith(".xml") || listOfFile[j].getName().endsWith(".xtx")) {
ArrayList<Text> text = new ArrayList<Text>();
//System.out.println(listOfFile[j].getName());
TextProcessor textprocessor = new TextProcessor();
text = textprocessor.newtext(listOfFile[j], listOfFile);
for (int i = 0; i < text.size(); i++) {
......
......@@ -51,14 +51,14 @@ public class normalizer {
}
br.close();
content = content.toUpperCase();
// content = content.replaceAll("-", " ");// parenthesis
// content = content.replaceAll("[^A-Z ]", "");// non A to Z
//
// content = content.replaceAll("\n", " ");//prob not nessesary :D
// content = content.replaceAll("\\s+", " ");// remove extra spaces\
content = content.replaceAll("[-\r\n\\s+]", " ");// parenthesis
//content = content.replaceAll("\r", " "); // make a new line
content = content.replaceAll("[^A-Z ]", "");// remove non A to Z
content = content.replaceAll("-", " ");// parenthesis
content = content.replaceAll("[^A-Z ]", "");// non A to Z
content = content.replaceAll("\r", " "); // make a new line
content = content.replaceAll("\n", " ");//prob not nessesary :D
content = content.replaceAll("\\s+", " ");// remove extra spaces\
//content = content.replaceAll("[-\r\n\\s+]", " ");// parenthesis
//content = content.replaceAll("[^A-Z ]", "");// remove non A to Z
PrintWriter out = new PrintWriter(txt);
out.println(content);
out.close();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment