Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
scidetect
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Cyril Labbe
scidetect
Commits
ab8af14c
Commit
ab8af14c
authored
Mar 18, 2015
by
Tien
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
brought back some Java doc from mistake delete.
parent
b337c6ba
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
65 additions
and
29 deletions
+65
-29
src/fr/imag/forge/scidetect/Checker/Classifier.java
src/fr/imag/forge/scidetect/Checker/Classifier.java
+8
-8
src/fr/imag/forge/scidetect/Checker/DistantCalculator.java
src/fr/imag/forge/scidetect/Checker/DistantCalculator.java
+16
-9
src/fr/imag/forge/scidetect/Checker/Indexer.java
src/fr/imag/forge/scidetect/Checker/Indexer.java
+3
-3
src/fr/imag/forge/scidetect/Checker/Utils/DistancesSet.java
src/fr/imag/forge/scidetect/Checker/Utils/DistancesSet.java
+21
-5
src/fr/imag/forge/scidetect/Checker/Utils/ThresholdsSet.java
src/fr/imag/forge/scidetect/Checker/Utils/ThresholdsSet.java
+16
-3
src/fr/imag/forge/scidetect/TextExtractor/pdfextractor.java
src/fr/imag/forge/scidetect/TextExtractor/pdfextractor.java
+1
-1
No files found.
src/fr/imag/forge/scidetect/Checker/Classifier.java
View file @
ab8af14c
...
...
@@ -37,7 +37,7 @@ public class Classifier {
private
ThresholdsSet
SetOfThresholds
;
/**
*
*
Build a new classifier, thresholds are read in the configuration file
*/
public
Classifier
()
{
this
.
SetOfThresholds
=
new
ThresholdsSet
();
...
...
@@ -86,10 +86,10 @@ public class Classifier {
*
* @param result a string composed having for each classes the value of its
* NN
*
@return
*
@return a string composed of the classes and the distances to the nearest neighbor in each class.
*/
private
String
checkdistant
(
String
result
)
{
String
conclu
t
ion
=
""
;
String
conclu
s
ion
=
""
;
String
[]
eachtype
=
result
.
split
(
"\n"
);
for
(
int
i
=
0
;
i
<
eachtype
.
length
;
i
++)
{
...
...
@@ -105,16 +105,16 @@ public class Classifier {
}
//check distant with threshold
if
(
Double
.
parseDouble
(
eachNN
[
1
])
<
threshold
[
0
])
{
conclu
t
ion
+=
"is a "
+
eachNN
[
0
]
+
"\t"
+
eachNN
[
1
]
+
"\t"
+
eachNN
[
2
]
+
"\n"
;
conclu
s
ion
+=
"is a "
+
eachNN
[
0
]
+
"\t"
+
eachNN
[
1
]
+
"\t"
+
eachNN
[
2
]
+
"\n"
;
}
else
if
(
Double
.
parseDouble
(
eachNN
[
1
])
<
threshold
[
1
])
{
conclu
t
ion
+=
"is suppected "
+
eachNN
[
0
]
+
"\t"
+
eachNN
[
1
]
+
"\t"
+
eachNN
[
2
]
+
"\n"
;
conclu
s
ion
+=
"is suppected "
+
eachNN
[
0
]
+
"\t"
+
eachNN
[
1
]
+
"\t"
+
eachNN
[
2
]
+
"\n"
;
}
}
if
(
conclu
t
ion
==
""
)
{
conclu
t
ion
=
findmindistant
(
result
);
if
(
conclu
s
ion
==
""
)
{
conclu
s
ion
=
findmindistant
(
result
);
}
return
conclu
t
ion
;
return
conclu
s
ion
;
}
/**
...
...
src/fr/imag/forge/scidetect/Checker/DistantCalculator.java
View file @
ab8af14c
...
...
@@ -24,38 +24,45 @@ import fr.imag.forge.scidetect.Checker.Utils.DistancesSet;
import
fr.imag.forge.scidetect.Corpus.Corpus
;
/**
* Compute distances between two sets of texts
*
* @author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
*/
public
class
DistantCalculator
{
//private HashMap<String, HashMap<String, Double>> distant = new HashMap<String, HashMap<String, Double>>();
private
DistancesSet
distant
=
new
DistancesSet
();
private
DistancesSet
distant
=
new
DistancesSet
();
/**
*calculate the distant between 2 text corpus
* Compute distances between each text of a corpus and the samples
*
* @param samples corpus
* @param tests corpus
* @return DistancesSet
from test to
sample
* @return DistancesSet
distances from text in test to text in
sample
*/
public
DistancesSet
caldistant
(
Corpus
samples
,
Corpus
tests
)
{
for
(
String
key
:
tests
.
keySet
())
{
//HashMap<String, Double> distantto = new HashMap<String, Double>();
for
(
String
key2
:
samples
.
keySet
())
{
double
distanttt
=
cal_textdistant
(
tests
.
get
(
key
),
samples
.
get
(
key2
));
// System.out.println("distant between " + key + " and " + key2
double
distanttt
=
cal_textdistant
(
tests
.
get
(
key
),
samples
.
get
(
key2
));
// System.out.println("distant between " + key + " and " + key2
// + ": " + distanttt);
//distantto.put(key2, distanttt);
distant
.
setDist
(
key
,
key2
,
distanttt
);
distant
.
setDist
(
key
,
key2
,
distanttt
);
}
//distant.put(key, distantto);
}
return
distant
;
}
/**
*Calculate distant between 2 text index
*/
/**
* Compute the distance between 2 texts index
*
* @param text1
* @param text2
* @return the distance between text 1 and text 2.
*/
private
double
cal_textdistant
(
HashMap
<
String
,
Integer
>
text1
,
HashMap
<
String
,
Integer
>
text2
)
{
double
nboftoken
=
0.0
;
...
...
src/fr/imag/forge/scidetect/Checker/Indexer.java
View file @
ab8af14c
...
...
@@ -22,7 +22,7 @@ import java.io.PrintWriter;
import
java.util.HashMap
;
/**
*
*
Index texts (i.e. for each word computes its occurrence number)
* @author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
*/
public
class
Indexer
{
...
...
@@ -30,8 +30,8 @@ public class Indexer {
private
Object
content
;
/**
* Index a text (count how many time each word
appeared
)
* and write
to file under then nam
e INDEX-filename.txt
* Index a text (count how many time each word
is appearing
)
* and write
s results in a fil
e INDEX-filename.txt
* @param content
* @param textfile
* @throws FileNotFoundException
...
...
src/fr/imag/forge/scidetect/Checker/Utils/DistancesSet.java
View file @
ab8af14c
/*
* Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package
fr.imag.forge.scidetect.Checker.Utils
;
import
java.util.HashMap
;
...
...
@@ -18,8 +34,8 @@ public class DistancesSet extends HashMap<String, HashMap<String, Double>>{
/**
* Get the value of the distance between A and B
* @param A
* @param B
* @param A
text
* @param B
text
* @return the distance between A and B
*/
public
Double
getDist
(
String
A
,
String
B
){
...
...
@@ -28,9 +44,9 @@ public class DistancesSet extends HashMap<String, HashMap<String, Double>>{
/**
* Set the distance between A and B to the value d
* @param A
* @param B
* @param d
* @param A
text
* @param B
text
* @param d
distant
*/
public
void
setDist
(
String
A
,
String
B
,
Double
d
){
if
(
this
.
get
(
A
)
==
null
)
{
this
.
put
(
A
,
new
HashMap
<
String
,
Double
>());}
...
...
src/fr/imag/forge/scidetect/Checker/Utils/ThresholdsSet.java
View file @
ab8af14c
/*
* Copyright (C) 2015 UNIVERSITE JOSEPH FOURIER (Grenoble 1)/ Springer-Verlag GmbH
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package
fr.imag.forge.scidetect.Checker.Utils
;
import
java.io.BufferedReader
;
...
...
@@ -18,8 +33,6 @@ public class ThresholdsSet extends HashMap<String, Double[]> {
/**
* Initialize the thresholds Set by reading the configuration file
* @throws FileNotFoundException
* @throws IOException
*/
public
void
Init
()
{
try
{
...
...
src/fr/imag/forge/scidetect/TextExtractor/pdfextractor.java
View file @
ab8af14c
...
...
@@ -27,7 +27,7 @@ import org.apache.pdfbox.pdmodel.PDDocument;
import
org.apache.pdfbox.util.PDFTextStripper
;
/**
*
e
xtract raw txt from a pdf File
*
E
xtract raw txt from a pdf File
* @author Nguyen Minh Tien - minh-tien.nguyen@imag.fr
*/
public
class
pdfextractor
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment