diff --git a/pom.xml b/pom.xml index e3d27b4..ed53320 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.4.7-SNAPSHOT + 2.6.14-SNAPSHOT @@ -262,6 +262,12 @@ ${dhp.commons.lang.version} + + com.github.sisyphsu + dateparser + 1.0.7 + + com.google.guava guava @@ -330,6 +336,11 @@ commons-lang3 + + com.github.sisyphsu + dateparser + + com.fasterxml.jackson.core jackson-databind diff --git a/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index 06781c4..ddb80ee 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -8,6 +8,7 @@ public class ModelConstants { public static final String ORCID = "orcid"; public static final String ORCID_PENDING = "orcid_pending"; public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID"; + public static final String ORCID_DS = ORCID.toUpperCase(); public static final String CROSSREF_ID = "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"; public static final String DATACITE_ID = "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254"; @@ -34,7 +35,7 @@ public class ModelConstants { public static final String DNET_COUNTRY_TYPE = "dnet:countries"; public static final String DNET_REVIEW_LEVELS = "dnet:review_levels"; public static final String DNET_PROGRAMMING_LANGUAGES = "dnet:programming_languages"; - public static final String DNET_EXTERNAL_REF_TYPES = "dnet:externalReference_typologies"; + public static final String DNET_EXTERNAL_REFERENCE_TYPE = "dnet:externalReference_typologies"; public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository"; public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry"; diff --git a/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index 00d97ec..a6d164c 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -7,17 +7,15 @@ import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.text.ParseException; -import java.time.Instant; -import java.time.format.DateTimeFormatter; -import java.util.Date; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; +import java.time.format.DateTimeParseException; +import java.util.*; import java.util.function.Function; import org.apache.commons.codec.binary.Hex; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.time.DateUtils; +import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.collect.Maps; import eu.dnetlib.dhp.schema.oaf.*; @@ -308,6 +306,8 @@ public class ModelSupport { private static final String schemeTemplate = "dnet:%s_%s_relations"; + public static final String DATE_FORMAT = "yyyy-MM-dd"; + private ModelSupport() { } @@ -501,12 +501,17 @@ public class ModelSupport { } if (StringUtils.isNotBlank(dateA) && StringUtils.isNotBlank(dateB)) { - final Date a = Date.from(Instant.from(DateTimeFormatter.ISO_INSTANT.parse(dateA))); - final Date b = Date.from(Instant.from(DateTimeFormatter.ISO_INSTANT.parse(dateB))); + final Date a = DateParserUtils.parseDate(dateA); + final Date b = DateParserUtils.parseDate(dateB); - return a.before(b) ? dateA : dateB; + if (Objects.nonNull(a) && Objects.nonNull(b)) { + return a.before(b) ? dateA : dateB; + } else { + return null; + } } else { return null; } } + } diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 945ebad..701f9ef 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.schema.oaf; import java.io.Serializable; import java.util.Comparator; import java.util.List; +import java.util.Objects; import java.util.stream.Collectors; import eu.dnetlib.dhp.schema.common.AccessRightComparator; @@ -256,6 +257,14 @@ public class Result extends OafEntity implements Serializable { if (r.getLanguage() != null && compareTrust(this, r) < 0) language = r.getLanguage(); + if (Objects.nonNull(r.getDateofacceptance())) { + if (Objects.isNull(getDateofacceptance())) { + dateofacceptance = r.getDateofacceptance(); + } else if (compareTrust(this, r) < 0) { + dateofacceptance = r.getDateofacceptance(); + } + } + country = mergeLists(country, r.getCountry()); subject = mergeLists(subject, r.getSubject()); diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java index 5a297be..0a22e02 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java @@ -5,8 +5,58 @@ import org.apache.commons.lang3.EnumUtils; public enum PidType { - // Result - doi, pmid, pmc, handle, arXiv, nct, pdb, + /** + * The DOI syntax shall be made up of a DOI prefix and a DOI suffix separated by a forward slash. + * + * There is no defined limit on the length of the DOI name, or of the DOI prefix or DOI suffix. + * + * The DOI name is case-insensitive and can incorporate any printable characters from the legal graphic characters + * of Unicode. Further constraints on character use (e.g. use of language-specific alphanumeric characters) can be + * defined for an application by the ISO 26324 Registration Authority. + * + * + * DOI prefix: The DOI prefix shall be composed of a directory indicator followed by a registrant code. + * These two components shall be separated by a full stop (period). The directory indicator shall be "10" and + * distinguishes the entire set of character strings (prefix and suffix) as digital object identifiers within the + * resolution system. + * + * Registrant code: The second element of the DOI prefix shall be the registrant code. The registrant code is a + * unique string assigned to a registrant. + * + * DOI suffix: The DOI suffix shall consist of a character string of any length chosen by the registrant. + * Each suffix shall be unique to the prefix element that precedes it. The unique suffix can be a sequential number, + * or it might incorporate an identifier generated from or based on another system used by the registrant + * (e.g. ISAN, ISBN, ISRC, ISSN, ISTC, ISNI; in such cases, a preferred construction for such a suffix can be + * specified, as in Example 1). + * + * Source: https://www.doi.org/doi_handbook/2_Numbering.html#2.2 + */ + doi, + + /** + * PubMed Unique Identifier (PMID) + * + * This field is a 1-to-8 digit accession number with no leading zeros. It is present on all records and is the + * accession number for managing and disseminating records. PMIDs are not reused after records are deleted. + * + * Beginning in February 2012 PMIDs include extensions following a decimal point to account for article versions + * (e.g., 21804956.2). All citations are considered version 1 until replaced. The extended PMID is not displayed + * on the MEDLINE format. + * + * View the citation in abstract format in PubMed to access additional versions when available (see the article in + * the Jan-Feb 2012 NLM Technical Bulletin). + * + * Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid + */ + pmid, + + /** + * This field contains the unique identifier for the cited article in PubMed Central. The identifier begins with the + * prefix PMC. + * + * Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc + */ + pmc, handle, arXiv, nct, pdb, // Organization openorgs, corda, corda_h2020, GRID, mag_id, urn, diff --git a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java index 98baa1e..f9e70c7 100644 --- a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java +++ b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java @@ -37,8 +37,8 @@ public class MergeTest { @Test public void mergePublicationCollectedFromTest() { - Publication a = new Publication(); - Publication b = new Publication(); + Publication a = publication(); + Publication b = publication(); a.setCollectedfrom(Arrays.asList(setKV("a", "open"), setKV("b", "closed"))); b.setCollectedfrom(Arrays.asList(setKV("A", "open"), setKV("b", "Open"))); @@ -49,11 +49,140 @@ public class MergeTest { assertEquals(3, a.getCollectedfrom().size()); } + @Test + public void mergePublicationDateOfAcceptanceTest_bothPresent() { + + Publication a = publication(); + Publication b = publication(); + + a.setDateofacceptance(field("2021-06-18")); + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-18", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_bothPresent_1() { + + Publication a = publication("0.8"); + Publication b = publication("0.9"); + + a.setDateofacceptance(field("2021-06-18")); + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_bothPresent_2() { + + Publication a = publication("0.9"); + Publication b = publication("0.8"); + + a.setDateofacceptance(field("2021-06-18")); + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-18", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_leftMissing() { + + Publication a = publication(); + Publication b = publication(); + + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_leftMissing_1() { + + Publication a = publication("0.9"); + Publication b = publication("0.8"); + + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_leftMissing_2() { + + Publication a = publication("0.8"); + Publication b = publication("0.9"); + + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_rightMissing() { + + Publication a = publication(); + Publication b = publication(); + + a.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_rightMissing_1() { + + Publication a = publication("0.8"); + Publication b = publication("0.9"); + + a.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_rightMissing_2() { + + Publication a = publication("0.9"); + Publication b = publication("0.8"); + + a.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + @Test public void mergePublicationSubjectTest() { - Publication a = new Publication(); - Publication b = new Publication(); + Publication a = publication(); + Publication b = publication(); a.setSubject(Arrays.asList(setSP("a", "open", "classe"), setSP("b", "open", "classe"))); b.setSubject(Arrays.asList(setSP("A", "open", "classe"), setSP("c", "open", "classe"))); @@ -91,13 +220,25 @@ public class MergeTest { b = createRel(true, "2016-04-05T12:41:19.202Z"); a.mergeFrom(b); assertEquals("2016-04-05T12:41:19.202Z", a.getValidationDate()); + + a = createRel(true, "2020-09-10 11:08:52"); + b = createRel(true, "2021-09-10 11:08:52"); + a.mergeFrom(b); + assertEquals("2020-09-10 11:08:52", a.getValidationDate()); + + a = createRel(true, "2021-03-16T10:32:42Z"); + b = createRel(true, "2020-03-16T10:32:42Z"); + a.mergeFrom(b); + assertEquals("2020-03-16T10:32:42Z", a.getValidationDate()); + + } @Test public void mergeRelationTestParseException() { assertThrows(DateTimeParseException.class, () -> { - Relation a = createRel(true, "2016-04-05"); - Relation b = createRel(true, "2016-04-05"); + Relation a = createRel(true, "Once upon a time ..."); + Relation b = createRel(true, "... in a far away land"); a.mergeFrom(b); }); } @@ -136,4 +277,28 @@ public class MergeTest { s.setQualifier(q); return s; } + + private Field field(T value) { + Field f = new Field(); + f.setValue(value); + return f; + } + + private Publication publication() { + Publication p = new Publication(); + p.setDataInfo(df("0.9")); + return p; + } + + private Publication publication(String trust) { + Publication p = new Publication(); + p.setDataInfo(df(trust)); + return p; + } + + private DataInfo df(String trust) { + DataInfo d = new DataInfo(); + d.setTrust(trust); + return d; + } }