diff --git a/src/main/java/fr/osug/doi/Const.java b/src/main/java/fr/osug/doi/Const.java index 0e44f5dd2d9f7f7f65fc7a46649ec9d8d74df3b1..95bbaf1ccea483c8bbec4f546f51008685849717 100644 --- a/src/main/java/fr/osug/doi/Const.java +++ b/src/main/java/fr/osug/doi/Const.java @@ -7,15 +7,22 @@ package fr.osug.doi; * */ public interface Const { - - /** datacite schema version '3.1' or '4.0' */ - public final static String SCHEMA_VERSION = "3.1"; - + + public final static String SCHEMA_VERSION_3_1 = "3.1"; + public final static String SCHEMA_VERSION_4_1 = "4.1"; + + public final static float F_SCHEMA_VERSION_3_1 = Float.valueOf(SCHEMA_VERSION_3_1); + public final static float F_SCHEMA_VERSION_4_1 = Float.valueOf(SCHEMA_VERSION_4_1); + + /** datacite schema version '3.1' or '4.1' */ + public final static String SCHEMA_VERSION = SCHEMA_VERSION_3_1; + public final static float F_SCHEMA_VERSION = Float.valueOf(SCHEMA_VERSION); + public final static String DOI_PREFIX_TEST = "10.5072"; public final static char SEPARATOR = ';'; public final static char COMMENT = '#'; - + public final static String FILE_EXT_CSV = ".csv"; public final static String FILE_EXT_XML = ".xml"; @@ -25,20 +32,26 @@ public interface Const { public static final String KEY_CREATOR_NAME = "creatorName"; public static final String KEY_CONTRIBUTOR_NAME = "contributorName"; - + public static final String KEY_CONTRIBUTOR_FUNDER_NAME = "contributorName:Funder"; + public static final String KEY_FUNDER_NAME = "funderName"; + public static final String KEY_REL_ID_START = "relatedIdentifier:"; public static final String KEY_REL_ID_DOI = ":DOI"; public static final String[] KEY_ORDER = new String[]{ //# 1 [identifier] - "identifier", KEY_IDENTIFIER, + KEY_IDENTIFIER, //# 2 [creators] KEY_CREATOR_NAME, + // # v4.1 + "creatorName:Organizational", + "creatorName:Personal", //# 3 [titles] KEY_TITLE, "title:AlternativeTitle", "title:Subtitle", "title:TranslatedTitle", + "title:Other", //# 4 [publisher] "publisher", // # 5 [publicationYear] @@ -54,7 +67,8 @@ public interface Const { "contributorName:DataManager", "contributorName:Distributor", "contributorName:Editor", - "contributorName:Funder", + // Funder is deprecated in v4.0 + KEY_CONTRIBUTOR_FUNDER_NAME, "contributorName:HostingInstitution", "contributorName:Other", "contributorName:Producer", @@ -77,6 +91,7 @@ public interface Const { "date:Copyrighted", "date:Created", "date:Issued", + "date:Other", "date:Submitted", "date:Updated", "date:Valid", @@ -84,15 +99,18 @@ public interface Const { "language", //# 10 [resourceType] (PARTIAL) "resourceType:Dataset", + "resourceType:DataPaper", "resourceType:Service", "resourceType:Software", - "resourceType:Text", "resourceType:Other", //# 11 [alternateIdentifiers] (IGNORED) + "alternateIdentifier:URL", + "alternateIdentifier:DOI", //# 12 [relatedIdentifiers] // URL: "relatedIdentifier:Cites:URL", "relatedIdentifier:References:URL", + "relatedIdentifier:HasPart:URL", // DOI: "relatedIdentifier:IsCitedBy:DOI", "relatedIdentifier:Cites:DOI", @@ -132,28 +150,37 @@ public interface Const { "description:Abstract", "description:Methods", "description:SeriesInformation", + "description:TechnicalInfo", "description:TableOfContents", "description:Other", //# 18 [geoLocations] KEY_GEO_LOCATION_PLACE, "geoLocationPoint", - "geoLocationBox" + "geoLocationBox", + //# 19 [funder] + KEY_FUNDER_NAME }; public static final String[] KEY_ATTRS = new String[]{ // creator / contributor attributes: // nameIdentifier variants: -// "nameIdentifier:AUTHORCLAIM", -// "nameIdentifier:ISNI", + // "nameIdentifier:AUTHORCLAIM", + // "nameIdentifier:ISNI", "nameIdentifier:ORCID", -// "nameIdentifier:RESEARCHERID", -// "nameIdentifier:VIAF", -// "nameIdentifier:URL", + // "nameIdentifier:RESEARCHERID", + // "nameIdentifier:VIAF", + // "nameIdentifier:URL", // affiliation: "affiliation", + // givenName + // familyName // rights attribute: - "rightsURI" + "rightsURI", + // funder attributes: + // funderIdentifier + "awardNumber", + "awardTitle" }; - + public static final String[] KEY_IGNORE = new String[]{ "Préfixe enregistrement doi", "Prénom Nom", diff --git a/src/main/java/fr/osug/doi/DoiCsvData.java b/src/main/java/fr/osug/doi/DoiCsvData.java index 2284a211a833a3ac94c0acaf34bf29577ac8a8bb..999a9678716c4c40cbb5d72a1607575bfb3c7e70 100644 --- a/src/main/java/fr/osug/doi/DoiCsvData.java +++ b/src/main/java/fr/osug/doi/DoiCsvData.java @@ -20,6 +20,7 @@ import java.util.Set; public final class DoiCsvData extends CsvData { static final Set<String> KEY_SET = new HashSet<String>(128); + static final Set<String> KEY_ATTR_SET = new HashSet<String>(32); static final Map<String, Integer> KEY_ORDER_INDEX = new HashMap<String, Integer>(128); static { @@ -31,7 +32,7 @@ public final class DoiCsvData extends CsvData { } // supported attributes (extra CSV columns): for (String key : Const.KEY_ATTRS) { - KEY_SET.add(key); + KEY_ATTR_SET.add(key); } } @@ -94,7 +95,7 @@ public final class DoiCsvData extends CsvData { // Check keys: if (i % 2 == 0) { - if (!KEY_SET.contains(v)) { + if (!KEY_SET.contains(v) && ((i == 0) || !KEY_ATTR_SET.contains(v))) { if (!shouldIgnoreKey(v)) { logger.warn("Invalid key found [{}] for row: {}", v, Arrays.toString(cols)); } diff --git a/src/main/java/fr/osug/doi/ProcessPipeline.java b/src/main/java/fr/osug/doi/ProcessPipeline.java index 18897b9be348d9de3746c159caf4da2e8d81ed10..ba69c36c8eb8350f7bf651dd700143ee69c68dda 100644 --- a/src/main/java/fr/osug/doi/ProcessPipeline.java +++ b/src/main/java/fr/osug/doi/ProcessPipeline.java @@ -18,6 +18,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Date; import java.util.LinkedHashSet; @@ -252,6 +253,9 @@ public final class ProcessPipeline extends AbstractPipeline<ProcessPipelineData> mergeCSV(projectConfig, data, doiData); } + // Upgrade metadata: + upgradeData(data, doiData); + // Validate metadata: validateData(projectConfig, data, doiData); @@ -322,54 +326,6 @@ public final class ProcessPipeline extends AbstractPipeline<ProcessPipelineData> } } - private void validateData(final ProjectConfig projectConfig, final DoiCsvData data, final ProcessPipelineDoiData doiData) throws IOException { - logger.debug("validateData [{}]", doiData.getDoiId()); - - // check title - if (data.getTitle().isEmpty()) { - doiData.addWarning("Empty title."); - } - - final List<String[]> rows = data.getRows(); - - // check all values: - for (ListIterator<String[]> it = rows.listIterator(); it.hasNext();) { - String[] cols = it.next(); - if (cols != null && cols.length >= 2) { - if (cols[1].toLowerCase().contains("todo")) { - doiData.addWarning("TODO detected for key [" + cols[0] + ']'); - } - - // Collect name references and override specific name entries: - final String key = cols[0]; - if (key.startsWith(Const.KEY_CREATOR_NAME) || key.startsWith(Const.KEY_CONTRIBUTOR_NAME)) { - final String name = cols[1]; - final String simpleName = ValidationUtil.simplifyName(name); - - // Check in override names: - final NameEntry override = projectConfig.getOverrideNameEntry(simpleName); - - if (override != null) { - logger.debug("override: {}", override); - cols = NameEntry.toCsv(override); - cols[0] = key; - - it.set(cols); - } - - pipeData.addNameRef(cols); - } - } - } - - final Set<String> refs = data.getReferences(); - if (refs != null) { - logger.debug("References: {}", refs); - } - // anyway: add the doi entry: - pipeData.addRefs(doiData.getDoiId(), refs); - } - private void saveData(final ProjectConfig projectConfig, final DoiCsvData data, final ProcessPipelineDoiData doiData) throws IOException { // Sort keys: data.sort(); @@ -452,4 +408,82 @@ public final class ProcessPipeline extends AbstractPipeline<ProcessPipelineData> FileUtils.writeFile(xmlDoc, fileXML); } } + + private void validateData(final ProjectConfig projectConfig, final DoiCsvData data, final ProcessPipelineDoiData doiData) throws IOException { + logger.debug("validateData [{}]", doiData.getDoiId()); + + // check title + if (data.getTitle().isEmpty()) { + doiData.addWarning("Empty title."); + } + + // check all values: + for (ListIterator<String[]> it = data.getRows().listIterator(); it.hasNext();) { + String[] cols = it.next(); + if (cols != null && cols.length >= 2) { + if (cols[1].toLowerCase().contains("todo")) { + doiData.addWarning("TODO detected for key [" + cols[0] + ']'); + } + + // Collect name references and override specific name entries: + final String key = cols[0]; + if (key.startsWith(Const.KEY_CREATOR_NAME) + || key.startsWith(Const.KEY_CONTRIBUTOR_NAME) + || key.startsWith(Const.KEY_FUNDER_NAME)) { + + final String name = cols[1]; + final String simpleName = ValidationUtil.simplifyName(name); + + // Check in override names: + final NameEntry override = projectConfig.getOverrideNameEntry(simpleName); + + if (override != null) { + logger.debug("override: {}", override); + cols = NameEntry.toCsv(override); + cols[0] = key; + + it.set(cols); + } + + pipeData.addNameRef(cols); + } + } + } + + final Set<String> refs = data.getReferences(); + if (refs != null) { + logger.debug("References: {}", refs); + } + // anyway: add the doi entry: + pipeData.addRefs(doiData.getDoiId(), refs); + } + + private void upgradeData(final DoiCsvData data, final ProcessPipelineDoiData doiData) throws IOException { + if (Const.F_SCHEMA_VERSION >= Const.F_SCHEMA_VERSION_4_1) { + logger.debug("upgradeData [{}] to schema {}", doiData.getDoiId(), Const.SCHEMA_VERSION); + + for (ListIterator<String[]> it = data.getRows().listIterator(); it.hasNext();) { + String[] cols = it.next(); + if (cols != null && cols.length >= 2) { + final String key = cols[0]; + + // Convert 'contributorName:Funder' (deprecated ContributorType=Funder) to 'funderName' (fundingReference) + if (Const.KEY_CONTRIBUTOR_FUNDER_NAME.equals(key)) { + if (cols.length > 2) { + logger.warn("upgradeData [{}] : Complex Contributor::Funder = {}", Arrays.toString(cols)); + // trim (optional attributes like affiliation or orcid) + final String value = cols[1]; + cols = new String[2]; + cols[1] = value; + } + cols[0] = Const.KEY_FUNDER_NAME; + + logger.info("upgradeData [{}]: Fixed Funder = {}", doiData.getDoiId(), cols[1]); + it.set(cols); + } + } + } + } + + } } diff --git a/src/test/java/fr/osug/doi/DOITextToXmlTest.java b/src/test/java/fr/osug/doi/DOITextToXmlTest.java index d7d898b53c21af9e411ee491e99039e057d25592..6aa8b0b1403834351e44ccc413f8b4b7b43a31a1 100644 --- a/src/test/java/fr/osug/doi/DOITextToXmlTest.java +++ b/src/test/java/fr/osug/doi/DOITextToXmlTest.java @@ -92,6 +92,7 @@ public class DOITextToXmlTest { // redirect outputs: projectConfig.initInputDir(DIR_TEST, true); + projectConfig.initMetadataDir(DIR_TEST + "xml/", true); projectConfig.initTmpDir(DIR_OUTPUT); projectConfig.initStagingDir(DIR_STAGING);