diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java index d21a58395..63db12d17 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/CleaningFunctions.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; public class CleaningFunctions { - public static final String DOI_PREFIX_REGEX = "^.*10\\."; + public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"; public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/"; public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)"; @@ -277,7 +277,7 @@ public class CleaningFunctions { // TODO add cleaning for more PID types as needed case "doi": - pid.setValue(value.toLowerCase().replaceAll(DOI_URL_PREFIX_REGEX, "")); + pid.setValue(value.toLowerCase().replaceAll(DOI_PREFIX_REGEX, "10.")); break; } return pid; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java index 66f843056..d1dc1f8c3 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java @@ -9,9 +9,9 @@ import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.AccessRightComparator; import org.apache.commons.lang3.StringUtils; -import eu.dnetlib.dhp.schema.common.LicenseComparator; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.utils.DHPUtils; @@ -327,10 +327,10 @@ public class OafMapperUtils { protected static Qualifier getBestAccessRights(final List instanceList) { if (instanceList != null) { - final Optional min = instanceList + final Optional min = instanceList .stream() .map(i -> i.getAccessright()) - .min(new LicenseComparator()); + .min(new AccessRightComparator<>()); final Qualifier rights = min.isPresent() ? min.get() : new Qualifier(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index fd90ee126..7df4f79d3 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -8,6 +8,11 @@ import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; +import com.google.common.collect.HashBiMap; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.KeyValue; import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.schema.oaf.CleaningFunctions; @@ -30,6 +35,13 @@ public class IdentifierFactory implements Serializable { public static final int ID_PREFIX_LEN = 12; + public static final HashBiMap PID_AUTHORITY = HashBiMap.create(2); + + static { + PID_AUTHORITY.put(ModelConstants.CROSSREF_ID, "Crossref"); + PID_AUTHORITY.put(ModelConstants.DATACITE_ID, "Datacite"); + } + /** * Creates an identifier from the most relevant PID (if available) in the given entity T. Returns entity.id * when no PID is available @@ -43,6 +55,14 @@ public class IdentifierFactory implements Serializable { return entity.getId(); } + if (Optional.ofNullable( + entity.getCollectedfrom()) + .map(c -> c.stream() + .noneMatch(cf -> PID_AUTHORITY.containsKey(cf.getKey()) || PID_AUTHORITY.containsValue(cf.getValue()))) + .orElse(true)) { + return entity.getId(); + } + Map> pids = entity .getPid() .stream() diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java index 17f172a42..14a5cf7df 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java @@ -23,7 +23,7 @@ public class IdentifierFactoryTest { public void testCreateIdentifierForPublication() throws IOException { verifyIdentifier( - "publication_doi1.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2011.03.013"), true); + "publication_doi1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false); verifyIdentifier( "publication_doi2.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2010.03.013"), true); verifyIdentifier("publication_pmc1.json", "50|pmc_________::" + DHPUtils.md5("21459329"), true); diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json index 7cc27f440..f2b53aa28 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/publication_doi2.json @@ -1 +1 @@ -{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2010.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}]} \ No newline at end of file +{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2010.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}]}