Commit 05c78f85 authored by Tien's avatar Tien
Browse files

removed some debug functions

parent 7d4318f2
...@@ -119,6 +119,7 @@ public class Reader { ...@@ -119,6 +119,7 @@ public class Reader {
readtests(listOfFile[j].getPath(), Samplecorpus, savedetaillog); readtests(listOfFile[j].getPath(), Samplecorpus, savedetaillog);
} else if (listOfFile[j].getName().endsWith(".pdf") || listOfFile[j].getName().endsWith(".xml") || listOfFile[j].getName().endsWith(".xtx")) { } else if (listOfFile[j].getName().endsWith(".pdf") || listOfFile[j].getName().endsWith(".xml") || listOfFile[j].getName().endsWith(".xtx")) {
ArrayList<Text> text = new ArrayList<Text>(); ArrayList<Text> text = new ArrayList<Text>();
//System.out.println(listOfFile[j].getName());
TextProcessor textprocessor = new TextProcessor(); TextProcessor textprocessor = new TextProcessor();
text = textprocessor.newtext(listOfFile[j], listOfFile); text = textprocessor.newtext(listOfFile[j], listOfFile);
for (int i = 0; i < text.size(); i++) { for (int i = 0; i < text.size(); i++) {
......
...@@ -51,14 +51,14 @@ public class normalizer { ...@@ -51,14 +51,14 @@ public class normalizer {
} }
br.close(); br.close();
content = content.toUpperCase(); content = content.toUpperCase();
// content = content.replaceAll("-", " ");// parenthesis content = content.replaceAll("-", " ");// parenthesis
// content = content.replaceAll("[^A-Z ]", "");// non A to Z content = content.replaceAll("[^A-Z ]", "");// non A to Z
// content = content.replaceAll("\r", " "); // make a new line
// content = content.replaceAll("\n", " ");//prob not nessesary :D content = content.replaceAll("\n", " ");//prob not nessesary :D
// content = content.replaceAll("\\s+", " ");// remove extra spaces\ content = content.replaceAll("\\s+", " ");// remove extra spaces\
content = content.replaceAll("[-\r\n\\s+]", " ");// parenthesis //content = content.replaceAll("[-\r\n\\s+]", " ");// parenthesis
//content = content.replaceAll("\r", " "); // make a new line
content = content.replaceAll("[^A-Z ]", "");// remove non A to Z //content = content.replaceAll("[^A-Z ]", "");// remove non A to Z
PrintWriter out = new PrintWriter(txt); PrintWriter out = new PrintWriter(txt);
out.println(content); out.println(content);
out.close(); out.close();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment