Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Cyril Labbe
scidetect
Commits
f8970f4d
Commit
f8970f4d
authored
Feb 23, 2015
by
Tien
Browse files
Update file reader method
parent
46ba435d
Changes
2
Hide whitespace changes
Inline
Side-by-side
scidetect/src/Checker/Classifier.java
View file @
f8970f4d
...
...
@@ -59,7 +59,7 @@ public class Classifier {
for
(
String
key
:
distant
.
keySet
())
{
//for each file in the test
result
=
find_NN
(
distant
.
get
(
key
));
System
.
out
.
println
(
result
);
//
System.out.println(result);
//System.out.println(key);
//System.out.println(result);
String
[]
a
=
checkdistant
(
result
).
split
(
"\n"
);
...
...
scidetect/src/Checker/Reader.java
View file @
f8970f4d
...
...
@@ -109,6 +109,54 @@ public class Reader {
}
}
private
void
readfile
(
File
pdf
)
throws
IOException
{
String
content
=
""
;
if
(
pdf
.
getName
().
endsWith
(
".pdf"
))
{
pdfextractor
a
=
new
pdfextractor
();
content
=
a
.
pdfextract
(
pdf
);
}
else
if
(
pdf
.
getName
().
endsWith
(
".xml"
)
||
pdf
.
getName
().
endsWith
(
".xtx"
))
{
Xmlextractor
a
=
new
Xmlextractor
();
content
=
a
.
xmlextract
(
pdf
);
}
//lets deal with long file over here
//split content and the index part by part
if
(
content
.
length
()
<
maxlength
)
{
String
indexname
=
"INDEX-"
+
pdf
.
getName
().
substring
(
0
,
pdf
.
getName
().
lastIndexOf
(
"."
))
+
".txt"
;
Indexer
b
=
new
Indexer
();
b
.
index
(
content
,
pdf
);
readindexfile
(
pdf
.
getParent
()
+
"/"
+
indexname
);
}
else
{
String
[]
part
=
splitcontent
(
content
);
for
(
int
i
=
0
;
i
<
part
.
length
;
i
++)
{
String
indexname
=
"INDEX-"
+
pdf
.
getName
().
substring
(
0
,
pdf
.
getName
().
lastIndexOf
(
"."
))
+
"_part"
+
i
+
".txt"
;
String
filename
=
pdf
.
getName
().
substring
(
0
,
pdf
.
getName
().
lastIndexOf
(
"."
))
+
"_part"
+
i
+
".txt"
;
Indexer
b
=
new
Indexer
();
File
a
=
new
File
(
pdf
.
getParent
()
+
"/"
+
filename
);
PrintWriter
out
=
new
PrintWriter
(
new
FileWriter
(
a
));
out
.
println
(
part
[
i
]);
//System.out.println(text);
out
.
close
();
b
.
index
(
part
[
i
],
a
);
readindexfile
(
a
.
getParent
()
+
"/"
+
indexname
);
}
}
}
public
HashMap
<
String
,
HashMap
<
String
,
Integer
>>
readtests
(
String
testpath
)
throws
IOException
{
File
folder
=
new
File
(
testpath
);
...
...
@@ -120,98 +168,12 @@ public class Reader {
if
(
listOfFile
[
j
].
isDirectory
())
{
readtests
(
listOfFile
[
j
].
getPath
());
}
else
if
(
listOfFile
[
j
].
getName
().
endsWith
(
".pdf"
)
||
listOfFile
[
j
].
getName
().
endsWith
(
".xml"
)
||
listOfFile
[
j
].
getName
().
endsWith
(
".xtx"
))
{
String
content
=
""
;
if
(
listOfFile
[
j
].
getName
().
endsWith
(
".pdf"
))
{
pdfextractor
a
=
new
pdfextractor
();
content
=
a
.
pdfextract
(
listOfFile
[
j
]);
}
else
if
(
listOfFile
[
j
].
getName
().
endsWith
(
".xml"
)
||
listOfFile
[
j
].
getName
().
endsWith
(
".xtx"
))
{
Xmlextractor
a
=
new
Xmlextractor
();
content
=
a
.
xmlextract
(
listOfFile
[
j
]);
}
//lets deal with long file over here
//split content and the index part by part
if
(
content
.
length
()
<
maxlength
)
{
String
indexname
=
"INDEX-"
+
listOfFile
[
j
].
getName
().
substring
(
0
,
listOfFile
[
j
].
getName
().
lastIndexOf
(
"."
))
+
".txt"
;
Indexer
b
=
new
Indexer
();
b
.
index
(
content
,
listOfFile
[
j
]);
readindexfile
(
listOfFile
[
j
].
getParent
()
+
"/"
+
indexname
);
}
else
{
String
[]
part
=
splitcontent
(
content
);
for
(
int
i
=
0
;
i
<
part
.
length
;
i
++)
{
String
indexname
=
"INDEX-"
+
listOfFile
[
j
].
getName
().
substring
(
0
,
listOfFile
[
j
].
getName
().
lastIndexOf
(
"."
))
+
"_part"
+
i
+
".txt"
;
String
filename
=
listOfFile
[
j
].
getName
().
substring
(
0
,
listOfFile
[
j
].
getName
().
lastIndexOf
(
"."
))
+
"_part"
+
i
+
".txt"
;
Indexer
b
=
new
Indexer
();
File
a
=
new
File
(
listOfFile
[
j
].
getParent
()
+
"/"
+
filename
);
PrintWriter
out
=
new
PrintWriter
(
new
FileWriter
(
a
));
out
.
println
(
part
[
i
]);
//System.out.println(text);
out
.
close
();
b
.
index
(
part
[
i
],
a
);
readindexfile
(
a
.
getParent
()
+
"/"
+
indexname
);
}
}
readfile
(
listOfFile
[
j
]);
}
}
}
else
if
(
folder
.
getName
().
endsWith
(
".pdf"
)
||
folder
.
getName
().
endsWith
(
".xml"
)
||
folder
.
getName
().
endsWith
(
".xtx"
))
{
String
content
=
""
;
if
(
folder
.
getName
().
endsWith
(
".pdf"
))
{
pdfextractor
a
=
new
pdfextractor
();
content
=
a
.
pdfextract
(
folder
);
}
else
if
(
folder
.
getName
().
endsWith
(
".xml"
)
||
folder
.
getName
().
endsWith
(
".xtx"
))
{
Xmlextractor
a
=
new
Xmlextractor
();
content
=
a
.
xmlextract
(
folder
);
}
//lets deal with long file over here
//split content and the index part by part
if
(
content
.
length
()
<
maxlength
)
{
String
indexname
=
"INDEX-"
+
folder
.
getName
().
substring
(
0
,
folder
.
getName
().
lastIndexOf
(
"."
))
+
".txt"
;
readfile
(
folder
);
Indexer
b
=
new
Indexer
();
b
.
index
(
content
,
folder
);
readindexfile
(
folder
.
getParent
()
+
"/"
+
indexname
);
}
else
{
String
[]
part
=
splitcontent
(
content
);
for
(
int
i
=
0
;
i
<
part
.
length
;
i
++)
{
String
indexname
=
"INDEX-"
+
folder
.
getName
().
substring
(
0
,
folder
.
getName
().
lastIndexOf
(
"."
))
+
"_part"
+
i
+
".txt"
;
String
filename
=
folder
.
getName
().
substring
(
0
,
folder
.
getName
().
lastIndexOf
(
"."
))
+
"_part"
+
i
+
".txt"
;
Indexer
b
=
new
Indexer
();
File
a
=
new
File
(
folder
.
getParent
()
+
"/"
+
filename
);
PrintWriter
out
=
new
PrintWriter
(
new
FileWriter
(
a
));
out
.
println
(
part
[
i
]);
//System.out.println(text);
out
.
close
();
b
.
index
(
part
[
i
],
a
);
readindexfile
(
a
.
getParent
()
+
"/"
+
indexname
);
}
}
}
return
tests
;
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment