Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Cyril Labbe
scidetect
Commits
d000b9e9
Commit
d000b9e9
authored
Jun 23, 2015
by
Tien
Browse files
fixed clean funtion
parent
f3296cb9
Changes
11
Hide whitespace changes
Inline
Side-by-side
detaillogs/detaillog
deleted
100644 → 0
View file @
f3296cb9
this is the detail log folder
logs/logs
deleted
100644 → 0
View file @
f3296cb9
this is the logs folder
logs/logs~
deleted
100644 → 0
View file @
f3296cb9
nbproject/private/private.properties
View file @
d000b9e9
application.args
=
-c /home/tien/Test_demo
-d
application.args
=
-c /home/tien/Test_demo
/xml/2978-3-319-20608-0_Book_PrintPDF.pdf -d -noclean
compile.on.save
=
true
do.depend
=
false
do.jar
=
true
...
...
nbproject/project.properties
View file @
d000b9e9
...
...
@@ -68,7 +68,7 @@ jnlp.signed=false
jnlp.signing
=
jnlp.signing.alias
=
jnlp.signing.keystore
=
main.class
=
fr.imag.forge.scidetect.SciDetect_local.Sci
d
etect_Local
main.class
=
fr.imag.forge.scidetect.SciDetect_local.Sci
D
etect_Local
# Optional override of default Codebase manifest attribute, use to prevent RIAs from being repurposed
manifest.custom.codebase
=
# Optional override of default Permissions manifest attribute (supported values: sandbox, all-permissions)
...
...
src/fr/imag/forge/scidetect/Checker/Classifier.java
View file @
d000b9e9
...
...
@@ -105,9 +105,9 @@ public class Classifier {
}
//check distant with threshold
if
(
Double
.
parseDouble
(
eachNN
[
1
])
<
threshold
[
0
])
{
conclusion
+=
"is a "
+
eachNN
[
0
]
+
"\t"
+
eachNN
[
1
]
+
"\t"
+
eachNN
[
2
]
+
"\n"
;
conclusion
+=
eachNN
[
0
]
+
"\t"
+
eachNN
[
1
]
+
"\t"
+
eachNN
[
2
]
+
"\n"
;
}
else
if
(
Double
.
parseDouble
(
eachNN
[
1
])
<
threshold
[
1
])
{
conclusion
+=
"
is s
uppected "
+
eachNN
[
0
]
+
"\t"
+
eachNN
[
1
]
+
"\t"
+
eachNN
[
2
]
+
"\n"
;
conclusion
+=
"
S
uppected "
+
eachNN
[
0
]
+
"\t"
+
eachNN
[
1
]
+
"\t"
+
eachNN
[
2
]
+
"\n"
;
}
}
...
...
@@ -132,7 +132,7 @@ public class Classifier {
String
[]
eachNN
=
eachtype
[
i
].
split
(
"\t"
);
if
(
Double
.
parseDouble
(
eachNN
[
1
])
<
mindistant
)
{
mindistant
=
Double
.
parseDouble
(
eachNN
[
1
]);
conclu
=
"
is
Genuine \t"
+
eachNN
[
1
]
+
"\t"
+
eachNN
[
2
]
+
"\n"
;
conclu
=
"Genuine \t"
+
eachNN
[
1
]
+
"\t"
+
eachNN
[
2
]
+
"\n"
;
}
}
return
conclu
;
...
...
src/fr/imag/forge/scidetect/Checker/Cleaner.java
View file @
d000b9e9
...
...
@@ -40,7 +40,7 @@ public class Cleaner {
String
originalname
=
listOfFile
[
j
].
getName
();
String
originalnamewoextension
=
listOfFile
[
j
].
getName
().
substring
(
0
,
listOfFile
[
j
].
getName
().
lastIndexOf
(
"."
));
for
(
int
i
=
0
;
i
<
listOfFile
.
length
;
i
++)
{
if
(
listOfFile
[
i
].
getName
().
contains
(
originalnamewoextension
)
&&
!
listOfFile
[
i
].
getName
().
contains
(
originalname
))
{
if
(
listOfFile
[
i
].
getName
().
contains
(
originalnamewoextension
)
&&
!
listOfFile
[
i
].
getName
().
contains
(
originalname
)
&&!
listOfFile
[
i
].
getName
().
contains
(
".pdf.xtx"
)
)
{
listOfFile
[
i
].
delete
();
}
}
...
...
@@ -52,7 +52,7 @@ public class Cleaner {
File
[]
listOfFile
=
folder
.
getParentFile
().
listFiles
();
String
originalnamewoextension
=
folder
.
getName
().
substring
(
0
,
folder
.
getName
().
lastIndexOf
(
"."
));
for
(
int
i
=
0
;
i
<
listOfFile
.
length
;
i
++)
{
if
(
listOfFile
[
i
].
getName
().
contains
(
originalnamewoextension
)
&&
!
listOfFile
[
i
].
getName
().
contains
(
originalname
))
{
if
(
listOfFile
[
i
].
getName
().
contains
(
originalnamewoextension
)
&&
!
listOfFile
[
i
].
getName
().
contains
(
originalname
)
&&!
listOfFile
[
i
].
getName
().
contains
(
".pdf.xtx"
)
)
{
listOfFile
[
i
].
delete
();
}
}
...
...
src/fr/imag/forge/scidetect/Checker/Reader.java
View file @
d000b9e9
...
...
@@ -40,10 +40,10 @@ public class Reader {
private
Corpus
samples
=
new
Corpus
();
private
Corpus
test
=
new
Corpus
();
private
String
SamplesFolder
;
/**
*Read config file
* Read config file
*
* @throws FileNotFoundException
* @throws IOException
*/
...
...
@@ -64,7 +64,7 @@ public class Reader {
//maxlength = Integer.parseInt(b[1]);
TextProcessor
.
maxlength
=
Integer
.
parseInt
(
b
[
1
]);
}
if
(
b
[
0
].
equals
(
"Min_length"
))
{
if
(
b
[
0
].
equals
(
"Min_length"
))
{
//maxlength = Integer.parseInt(b[1]);
TextProcessor
.
minlength
=
Integer
.
parseInt
(
b
[
1
]);
}
...
...
@@ -74,7 +74,8 @@ public class Reader {
}
/**
*Read the sample folder
* Read the sample folder
*
* @param foldername
* @return sample corpus
* @throws IOException
...
...
@@ -85,7 +86,7 @@ public class Reader {
for
(
int
j
=
0
;
j
<
listOfFile
.
length
;
j
++)
{
if
(
listOfFile
[
j
].
isDirectory
())
{
readsamples
(
listOfFile
[
j
].
getPath
());
}
else
if
(
listOfFile
[
j
].
getName
().
endsWith
(
".pdf"
)
||
listOfFile
[
j
].
getName
().
endsWith
(
".xml"
)
||
listOfFile
[
j
].
getName
().
endsWith
(
".xtx"
)||(
listOfFile
[
j
].
getName
().
endsWith
(
".txt"
)
&&
!
listOfFile
[
j
].
getName
().
startsWith
(
"INDEX-"
)))
{
}
else
if
(
listOfFile
[
j
].
getName
().
endsWith
(
".pdf"
)
||
listOfFile
[
j
].
getName
().
endsWith
(
".xml"
)
||
listOfFile
[
j
].
getName
().
endsWith
(
".xtx"
)
||
(
listOfFile
[
j
].
getName
().
endsWith
(
".txt"
)
&&
!
listOfFile
[
j
].
getName
().
startsWith
(
"INDEX-"
)))
{
ArrayList
<
Text
>
text
=
new
ArrayList
<
Text
>();
TextProcessor
textprocessor
=
new
TextProcessor
();
text
=
textprocessor
.
newtext
(
listOfFile
[
j
],
listOfFile
);
...
...
@@ -100,26 +101,40 @@ public class Reader {
}
/**
*Read the test folder
* Read the test folder
*
* @param foldername
* @return test corpus
* @throws IOException
*/
public
Corpus
readtests
(
String
foldername
)
throws
IOException
{
File
folder
=
new
File
(
foldername
);
File
[]
listOfFile
=
folder
.
listFiles
();
for
(
int
j
=
0
;
j
<
listOfFile
.
length
;
j
++)
{
if
(
listOfFile
[
j
].
isDirectory
())
{
readtests
(
listOfFile
[
j
].
getPath
());
}
else
if
(
listOfFile
[
j
].
getName
().
endsWith
(
".pdf"
)
||
listOfFile
[
j
].
getName
().
endsWith
(
".xml"
)
||
listOfFile
[
j
].
getName
().
endsWith
(
".xtx"
))
{
ArrayList
<
Text
>
text
=
new
ArrayList
<
Text
>();
TextProcessor
textprocessor
=
new
TextProcessor
();
text
=
textprocessor
.
newtext
(
listOfFile
[
j
],
listOfFile
);
for
(
int
i
=
0
;
i
<
text
.
size
();
i
++)
{
test
.
put
(
text
.
get
(
i
));
if
(
folder
.
isDirectory
())
{
File
[]
listOfFile
=
folder
.
listFiles
();
for
(
int
j
=
0
;
j
<
listOfFile
.
length
;
j
++)
{
if
(
listOfFile
[
j
].
isDirectory
())
{
readtests
(
listOfFile
[
j
].
getPath
());
}
else
if
(
listOfFile
[
j
].
getName
().
endsWith
(
".pdf"
)
||
listOfFile
[
j
].
getName
().
endsWith
(
".xml"
)
||
listOfFile
[
j
].
getName
().
endsWith
(
".xtx"
))
{
ArrayList
<
Text
>
text
=
new
ArrayList
<
Text
>();
TextProcessor
textprocessor
=
new
TextProcessor
();
text
=
textprocessor
.
newtext
(
listOfFile
[
j
],
listOfFile
);
for
(
int
i
=
0
;
i
<
text
.
size
();
i
++)
{
test
.
put
(
text
.
get
(
i
));
}
}
}
}
}
else
if
(
folder
.
getName
().
endsWith
(
".pdf"
)
||
folder
.
getName
().
endsWith
(
".xml"
)
||
folder
.
getName
().
endsWith
(
".xtx"
))
{
ArrayList
<
Text
>
text
=
new
ArrayList
<
Text
>();
TextProcessor
textprocessor
=
new
TextProcessor
();
File
[]
listOfFile
=
folder
.
getParentFile
().
listFiles
();
//listOfFile[0] = folder;
text
=
textprocessor
.
newtext
(
folder
,
listOfFile
);
for
(
int
i
=
0
;
i
<
text
.
size
();
i
++)
{
test
.
put
(
text
.
get
(
i
));
}
}
return
test
;
...
...
src/fr/imag/forge/scidetect/Corpus/TextProcessor.java
View file @
d000b9e9
...
...
@@ -63,6 +63,7 @@ public class TextProcessor {
original
.
getName
().
lastIndexOf
(
"."
))
+
".txt"
;
String
content
=
""
;
if
(
Arrays
.
asList
(
listOfFile
).
toString
().
contains
(
indexname
))
{
// System.out.println("lets read from index file");
readindexfile
(
original
.
getParent
()
+
"/"
+
indexname
);
...
...
src/fr/imag/forge/scidetect/Logger/Log.java
View file @
d000b9e9
...
...
@@ -53,7 +53,7 @@ public class Log {
if
(!
dloglocation
.
exists
())
{
dloglocation
.
mkdir
();
}
File
distantout
=
new
File
(
detailloglocation
+
logtime
+
".
xls
"
);
File
distantout
=
new
File
(
detailloglocation
+
logtime
+
".
tsv
"
);
//File distantout = new File(testpath+"/alldistant.xls");
PrintWriter
out
;
try
{
...
...
@@ -86,7 +86,7 @@ public class Log {
if
(!
location
.
exists
())
{
location
.
mkdir
();
}
distantout
=
new
File
(
loglocation
+
logtime
+
".
xls
"
);
distantout
=
new
File
(
loglocation
+
logtime
+
".
tsv
"
);
}
PrintWriter
out
;
...
...
src/fr/imag/forge/scidetect/SciDetect_local/SciDetect_Local.java
View file @
d000b9e9
...
...
@@ -17,6 +17,7 @@
package
fr.imag.forge.scidetect.SciDetect_local
;
import
fr.imag.forge.scidetect.Checker.Classifier
;
import
fr.imag.forge.scidetect.Checker.Cleaner
;
import
fr.imag.forge.scidetect.Checker.DistantCalculator
;
import
fr.imag.forge.scidetect.Checker.Reader
;
import
fr.imag.forge.scidetect.Checker.Utils.DistancesSet
;
...
...
@@ -39,7 +40,7 @@ import java.util.Date;
public
class
SciDetect_Local
{
// private String loglocation;
// private String detailloglocation;
// private String detailloglocation;
private
String
testpath
;
//private String logtime;
private
Corpus
samples
=
new
Corpus
();
...
...
@@ -48,11 +49,12 @@ public class SciDetect_Local {
//private HashMap<String, HashMap<String, Double>> distant = new HashMap<String, HashMap<String, Double>>();
DistancesSet
distant
=
new
DistancesSet
();
private
Boolean
savedetaillog
=
false
;
private
Boolean
clean
=
true
;
/**
* Read in the config file:
*
-
places where to
find samples of each class
*
- default places where to write results.
* Read in the config file:
- places where to find samples of each class -
*
default
places where to
write results.
*
* @throws FileNotFoundException
* @throws IOException
*/
...
...
@@ -80,16 +82,18 @@ public class SciDetect_Local {
}
}
/**
* This should be where all the components be called,
* It can be used as an interface for a stand alone SciDetect API library.
* This should be where all the components be called, It can be used as an
* interface for a stand alone SciDetect API library.
*
* @throws IOException
*/
public
void
compute
(
String
[]
args
)
throws
IOException
{
readconfig
();
readargs
(
args
);
if
(
testpath
!=
null
)
{
DateFormat
dateFormat
=
new
SimpleDateFormat
(
"
HH:mm dd.MM.yyyy
"
);
DateFormat
dateFormat
=
new
SimpleDateFormat
(
"
yyyy.MM.dd-HH:mm
"
);
Date
date
=
new
Date
();
Log
.
logtime
=
dateFormat
.
format
(
date
);
try
{
...
...
@@ -115,18 +119,22 @@ public class SciDetect_Local {
System
.
out
.
println
(
conclusion
);
Log
log
=
new
Log
();
log
.
savelog
(
conclusion
);
if
(
clean
){
Cleaner
cleaner
=
new
Cleaner
();
cleaner
.
clean
(
testpath
);}
if
(
savedetaillog
)
{
log
.
savedetaillog
(
distant
);
}
}
else
{
System
.
out
.
println
(
"***** Can not read path to the folder:"
+
testpath
);
System
.
out
.
println
(
"***** Can not read path to the folder:"
+
testpath
);
System
.
out
.
println
(
"***** The folder should contains file to check"
);
}
}
/**
* Parsing of the command line arguments:
* where to find pdf files, where results should be written
* Parsing of the command line arguments: where to find pdf files, where
* results should be written
*
* @param args
*/
public
void
readargs
(
String
[]
args
)
{
...
...
@@ -141,13 +149,17 @@ public class SciDetect_Local {
}
if
(
args
[
i
].
equals
(
"-d"
))
{
savedetaillog
=
true
;
}
if
(
args
[
i
].
equals
(
"-noclean"
))
{
clean
=
false
;
}
if
(
args
[
i
].
equals
(
"-h"
))
{
printUsage
();
}
}
}
else
{
printUsage
();}
}
else
{
printUsage
();
}
}
/**
...
...
@@ -161,14 +173,15 @@ public class SciDetect_Local {
System
.
out
.
println
(
"java -jar SciDetect_local.jar -h \n"
);
System
.
out
.
println
(
"***** \n"
);
}
/**
* This is the standalone checker. All pdf files in the dir specified after -c are
* checked against classes found in the dir "data". Results are written in the log
* file specified by the -l option. If -d is given a detailled log is produced.
* Example: testing all pdf files in a directory MyConf/PDF/ and having results
* in the MyConf/checklog.txt:
* java -jar ScigenChecker_local.jar -l MyConf/checklog.txt -c MyConf/PDF/
* This is the standalone checker. All pdf files in the dir specified after
* -c are checked against classes found in the dir "data". Results are
* written in the log file specified by the -l option. If -d is given a
* detailled log is produced. Example: testing all pdf files in a directory
* MyConf/PDF/ and having results in the MyConf/checklog.txt: java -jar
* ScigenChecker_local.jar -l MyConf/checklog.txt -c MyConf/PDF/
*
* @param args the command line arguments
* @throws java.io.IOException
*/
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment