Commit 6793c426 authored by bourgesl's avatar bourgesl
Browse files

preliminary support for schema 4.1 (keys / attribute) + funder migration

parent 53e0f995
......@@ -7,15 +7,22 @@ package fr.osug.doi;
*
*/
public interface Const {
/** datacite schema version '3.1' or '4.0' */
public final static String SCHEMA_VERSION = "3.1";
public final static String SCHEMA_VERSION_3_1 = "3.1";
public final static String SCHEMA_VERSION_4_1 = "4.1";
public final static float F_SCHEMA_VERSION_3_1 = Float.valueOf(SCHEMA_VERSION_3_1);
public final static float F_SCHEMA_VERSION_4_1 = Float.valueOf(SCHEMA_VERSION_4_1);
/** datacite schema version '3.1' or '4.1' */
public final static String SCHEMA_VERSION = SCHEMA_VERSION_3_1;
public final static float F_SCHEMA_VERSION = Float.valueOf(SCHEMA_VERSION);
public final static String DOI_PREFIX_TEST = "10.5072";
public final static char SEPARATOR = ';';
public final static char COMMENT = '#';
public final static String FILE_EXT_CSV = ".csv";
public final static String FILE_EXT_XML = ".xml";
......@@ -25,20 +32,26 @@ public interface Const {
public static final String KEY_CREATOR_NAME = "creatorName";
public static final String KEY_CONTRIBUTOR_NAME = "contributorName";
public static final String KEY_CONTRIBUTOR_FUNDER_NAME = "contributorName:Funder";
public static final String KEY_FUNDER_NAME = "funderName";
public static final String KEY_REL_ID_START = "relatedIdentifier:";
public static final String KEY_REL_ID_DOI = ":DOI";
public static final String[] KEY_ORDER = new String[]{
//# 1 [identifier]
"identifier", KEY_IDENTIFIER,
KEY_IDENTIFIER,
//# 2 [creators]
KEY_CREATOR_NAME,
// # v4.1
"creatorName:Organizational",
"creatorName:Personal",
//# 3 [titles]
KEY_TITLE,
"title:AlternativeTitle",
"title:Subtitle",
"title:TranslatedTitle",
"title:Other",
//# 4 [publisher]
"publisher",
// # 5 [publicationYear]
......@@ -54,7 +67,8 @@ public interface Const {
"contributorName:DataManager",
"contributorName:Distributor",
"contributorName:Editor",
"contributorName:Funder",
// Funder is deprecated in v4.0
KEY_CONTRIBUTOR_FUNDER_NAME,
"contributorName:HostingInstitution",
"contributorName:Other",
"contributorName:Producer",
......@@ -77,6 +91,7 @@ public interface Const {
"date:Copyrighted",
"date:Created",
"date:Issued",
"date:Other",
"date:Submitted",
"date:Updated",
"date:Valid",
......@@ -84,15 +99,18 @@ public interface Const {
"language",
//# 10 [resourceType] (PARTIAL)
"resourceType:Dataset",
"resourceType:DataPaper",
"resourceType:Service",
"resourceType:Software",
"resourceType:Text",
"resourceType:Other",
//# 11 [alternateIdentifiers] (IGNORED)
"alternateIdentifier:URL",
"alternateIdentifier:DOI",
//# 12 [relatedIdentifiers]
// URL:
"relatedIdentifier:Cites:URL",
"relatedIdentifier:References:URL",
"relatedIdentifier:HasPart:URL",
// DOI:
"relatedIdentifier:IsCitedBy:DOI",
"relatedIdentifier:Cites:DOI",
......@@ -132,28 +150,37 @@ public interface Const {
"description:Abstract",
"description:Methods",
"description:SeriesInformation",
"description:TechnicalInfo",
"description:TableOfContents",
"description:Other",
//# 18 [geoLocations]
KEY_GEO_LOCATION_PLACE,
"geoLocationPoint",
"geoLocationBox"
"geoLocationBox",
//# 19 [funder]
KEY_FUNDER_NAME
};
public static final String[] KEY_ATTRS = new String[]{
// creator / contributor attributes:
// nameIdentifier variants:
// "nameIdentifier:AUTHORCLAIM",
// "nameIdentifier:ISNI",
// "nameIdentifier:AUTHORCLAIM",
// "nameIdentifier:ISNI",
"nameIdentifier:ORCID",
// "nameIdentifier:RESEARCHERID",
// "nameIdentifier:VIAF",
// "nameIdentifier:URL",
// "nameIdentifier:RESEARCHERID",
// "nameIdentifier:VIAF",
// "nameIdentifier:URL",
// affiliation:
"affiliation",
// givenName
// familyName
// rights attribute:
"rightsURI"
"rightsURI",
// funder attributes:
// funderIdentifier
"awardNumber",
"awardTitle"
};
public static final String[] KEY_IGNORE = new String[]{
"Préfixe enregistrement doi",
"Prénom Nom",
......
......@@ -20,6 +20,7 @@ import java.util.Set;
public final class DoiCsvData extends CsvData {
static final Set<String> KEY_SET = new HashSet<String>(128);
static final Set<String> KEY_ATTR_SET = new HashSet<String>(32);
static final Map<String, Integer> KEY_ORDER_INDEX = new HashMap<String, Integer>(128);
static {
......@@ -31,7 +32,7 @@ public final class DoiCsvData extends CsvData {
}
// supported attributes (extra CSV columns):
for (String key : Const.KEY_ATTRS) {
KEY_SET.add(key);
KEY_ATTR_SET.add(key);
}
}
......@@ -94,7 +95,7 @@ public final class DoiCsvData extends CsvData {
// Check keys:
if (i % 2 == 0) {
if (!KEY_SET.contains(v)) {
if (!KEY_SET.contains(v) && ((i == 0) || !KEY_ATTR_SET.contains(v))) {
if (!shouldIgnoreKey(v)) {
logger.warn("Invalid key found [{}] for row: {}", v, Arrays.toString(cols));
}
......
......@@ -18,6 +18,7 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.LinkedHashSet;
......@@ -252,6 +253,9 @@ public final class ProcessPipeline extends AbstractPipeline<ProcessPipelineData>
mergeCSV(projectConfig, data, doiData);
}
// Upgrade metadata:
upgradeData(data, doiData);
// Validate metadata:
validateData(projectConfig, data, doiData);
......@@ -322,54 +326,6 @@ public final class ProcessPipeline extends AbstractPipeline<ProcessPipelineData>
}
}
private void validateData(final ProjectConfig projectConfig, final DoiCsvData data, final ProcessPipelineDoiData doiData) throws IOException {
logger.debug("validateData [{}]", doiData.getDoiId());
// check title
if (data.getTitle().isEmpty()) {
doiData.addWarning("Empty title.");
}
final List<String[]> rows = data.getRows();
// check all values:
for (ListIterator<String[]> it = rows.listIterator(); it.hasNext();) {
String[] cols = it.next();
if (cols != null && cols.length >= 2) {
if (cols[1].toLowerCase().contains("todo")) {
doiData.addWarning("TODO detected for key [" + cols[0] + ']');
}
// Collect name references and override specific name entries:
final String key = cols[0];
if (key.startsWith(Const.KEY_CREATOR_NAME) || key.startsWith(Const.KEY_CONTRIBUTOR_NAME)) {
final String name = cols[1];
final String simpleName = ValidationUtil.simplifyName(name);
// Check in override names:
final NameEntry override = projectConfig.getOverrideNameEntry(simpleName);
if (override != null) {
logger.debug("override: {}", override);
cols = NameEntry.toCsv(override);
cols[0] = key;
it.set(cols);
}
pipeData.addNameRef(cols);
}
}
}
final Set<String> refs = data.getReferences();
if (refs != null) {
logger.debug("References: {}", refs);
}
// anyway: add the doi entry:
pipeData.addRefs(doiData.getDoiId(), refs);
}
private void saveData(final ProjectConfig projectConfig, final DoiCsvData data, final ProcessPipelineDoiData doiData) throws IOException {
// Sort keys:
data.sort();
......@@ -452,4 +408,82 @@ public final class ProcessPipeline extends AbstractPipeline<ProcessPipelineData>
FileUtils.writeFile(xmlDoc, fileXML);
}
}
private void validateData(final ProjectConfig projectConfig, final DoiCsvData data, final ProcessPipelineDoiData doiData) throws IOException {
logger.debug("validateData [{}]", doiData.getDoiId());
// check title
if (data.getTitle().isEmpty()) {
doiData.addWarning("Empty title.");
}
// check all values:
for (ListIterator<String[]> it = data.getRows().listIterator(); it.hasNext();) {
String[] cols = it.next();
if (cols != null && cols.length >= 2) {
if (cols[1].toLowerCase().contains("todo")) {
doiData.addWarning("TODO detected for key [" + cols[0] + ']');
}
// Collect name references and override specific name entries:
final String key = cols[0];
if (key.startsWith(Const.KEY_CREATOR_NAME)
|| key.startsWith(Const.KEY_CONTRIBUTOR_NAME)
|| key.startsWith(Const.KEY_FUNDER_NAME)) {
final String name = cols[1];
final String simpleName = ValidationUtil.simplifyName(name);
// Check in override names:
final NameEntry override = projectConfig.getOverrideNameEntry(simpleName);
if (override != null) {
logger.debug("override: {}", override);
cols = NameEntry.toCsv(override);
cols[0] = key;
it.set(cols);
}
pipeData.addNameRef(cols);
}
}
}
final Set<String> refs = data.getReferences();
if (refs != null) {
logger.debug("References: {}", refs);
}
// anyway: add the doi entry:
pipeData.addRefs(doiData.getDoiId(), refs);
}
private void upgradeData(final DoiCsvData data, final ProcessPipelineDoiData doiData) throws IOException {
if (Const.F_SCHEMA_VERSION >= Const.F_SCHEMA_VERSION_4_1) {
logger.debug("upgradeData [{}] to schema {}", doiData.getDoiId(), Const.SCHEMA_VERSION);
for (ListIterator<String[]> it = data.getRows().listIterator(); it.hasNext();) {
String[] cols = it.next();
if (cols != null && cols.length >= 2) {
final String key = cols[0];
// Convert 'contributorName:Funder' (deprecated ContributorType=Funder) to 'funderName' (fundingReference)
if (Const.KEY_CONTRIBUTOR_FUNDER_NAME.equals(key)) {
if (cols.length > 2) {
logger.warn("upgradeData [{}] : Complex Contributor::Funder = {}", Arrays.toString(cols));
// trim (optional attributes like affiliation or orcid)
final String value = cols[1];
cols = new String[2];
cols[1] = value;
}
cols[0] = Const.KEY_FUNDER_NAME;
logger.info("upgradeData [{}]: Fixed Funder = {}", doiData.getDoiId(), cols[1]);
it.set(cols);
}
}
}
}
}
}
......@@ -92,6 +92,7 @@ public class DOITextToXmlTest {
// redirect outputs:
projectConfig.initInputDir(DIR_TEST, true);
projectConfig.initMetadataDir(DIR_TEST + "xml/", true);
projectConfig.initTmpDir(DIR_OUTPUT);
projectConfig.initStagingDir(DIR_STAGING);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment