From db618fe0c36055a47822880559ec0af9101bbc0d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 20 May 2021 15:45:52 +0200 Subject: [PATCH 01/24] uniform constant names across the branches --- src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index 06781c4..7cae122 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -34,7 +34,7 @@ public class ModelConstants { public static final String DNET_COUNTRY_TYPE = "dnet:countries"; public static final String DNET_REVIEW_LEVELS = "dnet:review_levels"; public static final String DNET_PROGRAMMING_LANGUAGES = "dnet:programming_languages"; - public static final String DNET_EXTERNAL_REF_TYPES = "dnet:externalReference_typologies"; + public static final String DNET_EXTERNAL_REFERENCE_TYPE = "dnet:externalReference_typologies"; public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository"; public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry"; From 679539525b6666d41683adcbce0c77a4390b5223 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 24 May 2021 08:57:31 +0200 Subject: [PATCH 02/24] [maven-release-plugin] prepare release dhp-schemas-2.4.7 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index e3d27b4..02908b8 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.4.7-SNAPSHOT + 2.4.7 @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - HEAD + dhp-schemas-2.4.7 This module contains common schema classes meant to be used across the dnet-hadoop submodules From 1d9e909d2f09d00918524488fcd7fb63a40c8495 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 24 May 2021 08:57:37 +0200 Subject: [PATCH 03/24] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 02908b8..1f0df9d 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.4.7 + 2.4.8-SNAPSHOT @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - dhp-schemas-2.4.7 + HEAD This module contains common schema classes meant to be used across the dnet-hadoop submodules From acd28f94ccc67cb83f3c0b4482be47407e5dde61 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 26 May 2021 17:41:09 +0200 Subject: [PATCH 04/24] added constant for ORCID datasource name --- src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index 7cae122..ddb80ee 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -8,6 +8,7 @@ public class ModelConstants { public static final String ORCID = "orcid"; public static final String ORCID_PENDING = "orcid_pending"; public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID"; + public static final String ORCID_DS = ORCID.toUpperCase(); public static final String CROSSREF_ID = "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"; public static final String DATACITE_ID = "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254"; From 1b500a9d7ce8007eebd10c2ea01c19b047eab418 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 26 May 2021 17:41:51 +0200 Subject: [PATCH 05/24] [maven-release-plugin] prepare release dhp-schemas-2.4.8 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 1f0df9d..5a55591 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.4.8-SNAPSHOT + 2.4.8 @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - HEAD + dhp-schemas-2.4.8 This module contains common schema classes meant to be used across the dnet-hadoop submodules From bf7acf268cb265d8cca0771b7b187665a38fbb3b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 26 May 2021 17:41:56 +0200 Subject: [PATCH 06/24] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 5a55591..b4692ee 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.4.8 + 2.4.9-SNAPSHOT @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - dhp-schemas-2.4.8 + HEAD This module contains common schema classes meant to be used across the dnet-hadoop submodules From e447ac68fac91198355fafc98d9a9fd6001fd12a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 May 2021 14:58:39 +0200 Subject: [PATCH 07/24] enable the possibility to extend the date formats used to parse Relation.validationDate --- .../dhp/schema/common/ModelSupport.java | 38 +++++++++++++++---- .../eu/dnetlib/dhp/schema/oaf/MergeTest.java | 8 +++- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index 00d97ec..03e80d6 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -9,15 +9,19 @@ import java.security.NoSuchAlgorithmException; import java.text.ParseException; import java.time.Instant; import java.time.format.DateTimeFormatter; -import java.util.Date; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; +import java.time.format.DateTimeParseException; +import java.time.temporal.ChronoField; +import java.time.temporal.TemporalAccessor; +import java.time.temporal.TemporalField; +import java.util.*; import java.util.function.Function; +import java.util.stream.Collectors; import org.apache.commons.codec.binary.Hex; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.time.DateUtils; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import eu.dnetlib.dhp.schema.oaf.*; @@ -308,6 +312,11 @@ public class ModelSupport { private static final String schemeTemplate = "dnet:%s_%s_relations"; + public static final String[] DATE_TIME_FORMATS = { + "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", + "yyyy-MM-dd hh:mm:ss" + }; + private ModelSupport() { } @@ -501,12 +510,27 @@ public class ModelSupport { } if (StringUtils.isNotBlank(dateA) && StringUtils.isNotBlank(dateB)) { - final Date a = Date.from(Instant.from(DateTimeFormatter.ISO_INSTANT.parse(dateA))); - final Date b = Date.from(Instant.from(DateTimeFormatter.ISO_INSTANT.parse(dateB))); + final Date a = tryParse(dateA); + final Date b = tryParse(dateB); - return a.before(b) ? dateA : dateB; + if (Objects.nonNull(a) && Objects.nonNull(b)) { + return a.before(b) ? dateA : dateB; + } else { + return null; + } } else { return null; } } + + private static Date tryParse(String date) throws DateTimeParseException { + try { + return DateUtils.parseDate(date, DATE_TIME_FORMATS); + } catch (ParseException e) { + // ignore it, try another format + } + final String formats = String.join("\n", Arrays.asList(DATE_TIME_FORMATS)); + throw new DateTimeParseException(String.format("cannot parse %s, supported formats: \n%s", date, formats), date, 0); + } + } diff --git a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java index 98baa1e..ca57fab 100644 --- a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java +++ b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java @@ -91,12 +91,18 @@ public class MergeTest { b = createRel(true, "2016-04-05T12:41:19.202Z"); a.mergeFrom(b); assertEquals("2016-04-05T12:41:19.202Z", a.getValidationDate()); + + a = createRel(true, "2020-09-10 11:08:52"); + b = createRel(true, "2021-09-10 11:08:52"); + a.mergeFrom(b); + assertEquals("2020-09-10 11:08:52", a.getValidationDate()); + } @Test public void mergeRelationTestParseException() { assertThrows(DateTimeParseException.class, () -> { - Relation a = createRel(true, "2016-04-05"); + Relation a = createRel(true, "2016 April 05"); Relation b = createRel(true, "2016-04-05"); a.mergeFrom(b); }); From 9dbe397b1acb4e033c5ddb255e0f2d20c5a7de91 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 May 2021 15:02:26 +0200 Subject: [PATCH 08/24] bumped pom version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b4692ee..41042ab 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.4.9-SNAPSHOT + 2.5.9-SNAPSHOT From b1c95210efa4ea40ee5ba408f90112e590a0d997 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 May 2021 15:03:35 +0200 Subject: [PATCH 09/24] [maven-release-plugin] prepare release dhp-schemas-2.5.9 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 41042ab..053d348 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.5.9-SNAPSHOT + 2.5.9 @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - HEAD + dhp-schemas-2.5.9 This module contains common schema classes meant to be used across the dnet-hadoop submodules From 6cce7245a3cbe4f151ae1d5ea5acc5581864bd44 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 May 2021 15:03:41 +0200 Subject: [PATCH 10/24] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 053d348..291d761 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.5.9 + 2.5.10-SNAPSHOT @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - dhp-schemas-2.5.9 + HEAD This module contains common schema classes meant to be used across the dnet-hadoop submodules From 8e764c10fc2616d0b471bba53941542abf384dd6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 May 2021 17:28:34 +0200 Subject: [PATCH 11/24] added more date formats --- .../java/eu/dnetlib/dhp/schema/common/ModelSupport.java | 9 ++------- src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java | 6 ++++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index 03e80d6..990d719 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -7,21 +7,14 @@ import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.text.ParseException; -import java.time.Instant; -import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; -import java.time.temporal.ChronoField; -import java.time.temporal.TemporalAccessor; -import java.time.temporal.TemporalField; import java.util.*; import java.util.function.Function; -import java.util.stream.Collectors; import org.apache.commons.codec.binary.Hex; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.DateUtils; -import com.google.common.collect.Lists; import com.google.common.collect.Maps; import eu.dnetlib.dhp.schema.oaf.*; @@ -314,6 +307,8 @@ public class ModelSupport { public static final String[] DATE_TIME_FORMATS = { "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", + "yyyy-MM-dd'T'hh:mm:ss'Z'", + "yyyy-MM-dd hh:mm:ss'Z'", "yyyy-MM-dd hh:mm:ss" }; diff --git a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java index ca57fab..ac3e76e 100644 --- a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java +++ b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java @@ -97,6 +97,12 @@ public class MergeTest { a.mergeFrom(b); assertEquals("2020-09-10 11:08:52", a.getValidationDate()); + a = createRel(true, "2021-03-16T10:32:42Z"); + b = createRel(true, "2020-03-16T10:32:42Z"); + a.mergeFrom(b); + assertEquals("2020-03-16T10:32:42Z", a.getValidationDate()); + + } @Test From b39e0a2b7404bcdece5f0dc118c38b06193c2cf9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 May 2021 17:29:27 +0200 Subject: [PATCH 12/24] [maven-release-plugin] prepare release dhp-schemas-2.5.10 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 291d761..f5eb34f 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.5.10-SNAPSHOT + 2.5.10 @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - HEAD + dhp-schemas-2.5.10 This module contains common schema classes meant to be used across the dnet-hadoop submodules From 80f68327def899436c0657c2aac88f14584caee0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 27 May 2021 17:29:32 +0200 Subject: [PATCH 13/24] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index f5eb34f..468aac7 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.5.10 + 2.5.11-SNAPSHOT @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - dhp-schemas-2.5.10 + HEAD This module contains common schema classes meant to be used across the dnet-hadoop submodules From c9495f4b5dfd7693828296012dc6853350f0893f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 28 May 2021 10:43:08 +0200 Subject: [PATCH 14/24] added more date formats --- src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index 990d719..ac2df6d 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -309,7 +309,8 @@ public class ModelSupport { "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", "yyyy-MM-dd'T'hh:mm:ss'Z'", "yyyy-MM-dd hh:mm:ss'Z'", - "yyyy-MM-dd hh:mm:ss" + "yyyy-MM-dd hh:mm:ss", + "yyyy-MM-dd" }; private ModelSupport() { From 961f558ac6d06f6709b00fcefb3fb8074eec9c23 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 28 May 2021 10:45:10 +0200 Subject: [PATCH 15/24] [maven-release-plugin] prepare release dhp-schemas-2.5.11 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 468aac7..f12c28c 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.5.11-SNAPSHOT + 2.5.11 @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - HEAD + dhp-schemas-2.5.11 This module contains common schema classes meant to be used across the dnet-hadoop submodules From 7abc9d1d968b578f5471ea98b42054d7a89abc07 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 28 May 2021 10:45:16 +0200 Subject: [PATCH 16/24] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index f12c28c..644230c 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.5.11 + 2.5.12-SNAPSHOT @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - dhp-schemas-2.5.11 + HEAD This module contains common schema classes meant to be used across the dnet-hadoop submodules From 4328c3ca1983263016dd43194501957b8aaa95af Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 9 Jun 2021 12:29:24 +0200 Subject: [PATCH 17/24] delegating the date parsing to https://github.com/sisyphsu/dateparser --- pom.xml | 11 +++++++++ .../dhp/schema/common/ModelSupport.java | 23 ++++--------------- .../eu/dnetlib/dhp/schema/oaf/MergeTest.java | 4 ++-- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/pom.xml b/pom.xml index 644230c..93bef2b 100644 --- a/pom.xml +++ b/pom.xml @@ -262,6 +262,12 @@ ${dhp.commons.lang.version} + + com.github.sisyphsu + dateparser + 1.0.7 + + com.google.guava guava @@ -330,6 +336,11 @@ commons-lang3 + + com.github.sisyphsu + dateparser + + com.fasterxml.jackson.core jackson-databind diff --git a/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index ac2df6d..a6d164c 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -15,6 +15,7 @@ import org.apache.commons.codec.binary.Hex; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.DateUtils; +import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.collect.Maps; import eu.dnetlib.dhp.schema.oaf.*; @@ -305,13 +306,7 @@ public class ModelSupport { private static final String schemeTemplate = "dnet:%s_%s_relations"; - public static final String[] DATE_TIME_FORMATS = { - "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", - "yyyy-MM-dd'T'hh:mm:ss'Z'", - "yyyy-MM-dd hh:mm:ss'Z'", - "yyyy-MM-dd hh:mm:ss", - "yyyy-MM-dd" - }; + public static final String DATE_FORMAT = "yyyy-MM-dd"; private ModelSupport() { } @@ -506,8 +501,8 @@ public class ModelSupport { } if (StringUtils.isNotBlank(dateA) && StringUtils.isNotBlank(dateB)) { - final Date a = tryParse(dateA); - final Date b = tryParse(dateB); + final Date a = DateParserUtils.parseDate(dateA); + final Date b = DateParserUtils.parseDate(dateB); if (Objects.nonNull(a) && Objects.nonNull(b)) { return a.before(b) ? dateA : dateB; @@ -519,14 +514,4 @@ public class ModelSupport { } } - private static Date tryParse(String date) throws DateTimeParseException { - try { - return DateUtils.parseDate(date, DATE_TIME_FORMATS); - } catch (ParseException e) { - // ignore it, try another format - } - final String formats = String.join("\n", Arrays.asList(DATE_TIME_FORMATS)); - throw new DateTimeParseException(String.format("cannot parse %s, supported formats: \n%s", date, formats), date, 0); - } - } diff --git a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java index ac3e76e..92bd92e 100644 --- a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java +++ b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java @@ -108,8 +108,8 @@ public class MergeTest { @Test public void mergeRelationTestParseException() { assertThrows(DateTimeParseException.class, () -> { - Relation a = createRel(true, "2016 April 05"); - Relation b = createRel(true, "2016-04-05"); + Relation a = createRel(true, "Once upon a time ..."); + Relation b = createRel(true, "... in a far away land"); a.mergeFrom(b); }); } From 48c7949946804680280e75e8f4f3a929d2be8de5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 11 Jun 2021 16:53:59 +0200 Subject: [PATCH 18/24] [maven-release-plugin] prepare release dhp-schemas-2.5.12 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 93bef2b..849a2c3 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.5.12-SNAPSHOT + 2.5.12 @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - HEAD + dhp-schemas-2.5.12 This module contains common schema classes meant to be used across the dnet-hadoop submodules From 3b15fefa7b594ae10cf02a1cd82f629fe7e935af Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 11 Jun 2021 16:54:04 +0200 Subject: [PATCH 19/24] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 849a2c3..ba822ef 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.5.12 + 2.5.13-SNAPSHOT @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - dhp-schemas-2.5.12 + HEAD This module contains common schema classes meant to be used across the dnet-hadoop submodules From f0e00d3abdd41564ee7deb7191b90567fa3015cf Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 18 Jun 2021 09:57:57 +0200 Subject: [PATCH 20/24] WIP descriptions for pid types --- .../dnetlib/dhp/schema/oaf/utils/PidType.java | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java index 5a297be..0a22e02 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidType.java @@ -5,8 +5,58 @@ import org.apache.commons.lang3.EnumUtils; public enum PidType { - // Result - doi, pmid, pmc, handle, arXiv, nct, pdb, + /** + * The DOI syntax shall be made up of a DOI prefix and a DOI suffix separated by a forward slash. + * + * There is no defined limit on the length of the DOI name, or of the DOI prefix or DOI suffix. + * + * The DOI name is case-insensitive and can incorporate any printable characters from the legal graphic characters + * of Unicode. Further constraints on character use (e.g. use of language-specific alphanumeric characters) can be + * defined for an application by the ISO 26324 Registration Authority. + * + * + * DOI prefix: The DOI prefix shall be composed of a directory indicator followed by a registrant code. + * These two components shall be separated by a full stop (period). The directory indicator shall be "10" and + * distinguishes the entire set of character strings (prefix and suffix) as digital object identifiers within the + * resolution system. + * + * Registrant code: The second element of the DOI prefix shall be the registrant code. The registrant code is a + * unique string assigned to a registrant. + * + * DOI suffix: The DOI suffix shall consist of a character string of any length chosen by the registrant. + * Each suffix shall be unique to the prefix element that precedes it. The unique suffix can be a sequential number, + * or it might incorporate an identifier generated from or based on another system used by the registrant + * (e.g. ISAN, ISBN, ISRC, ISSN, ISTC, ISNI; in such cases, a preferred construction for such a suffix can be + * specified, as in Example 1). + * + * Source: https://www.doi.org/doi_handbook/2_Numbering.html#2.2 + */ + doi, + + /** + * PubMed Unique Identifier (PMID) + * + * This field is a 1-to-8 digit accession number with no leading zeros. It is present on all records and is the + * accession number for managing and disseminating records. PMIDs are not reused after records are deleted. + * + * Beginning in February 2012 PMIDs include extensions following a decimal point to account for article versions + * (e.g., 21804956.2). All citations are considered version 1 until replaced. The extended PMID is not displayed + * on the MEDLINE format. + * + * View the citation in abstract format in PubMed to access additional versions when available (see the article in + * the Jan-Feb 2012 NLM Technical Bulletin). + * + * Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid + */ + pmid, + + /** + * This field contains the unique identifier for the cited article in PubMed Central. The identifier begins with the + * prefix PMC. + * + * Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc + */ + pmc, handle, arXiv, nct, pdb, // Organization openorgs, corda, corda_h2020, GRID, mag_id, urn, From f5a3b451c9e504d29d5e600e02e40c5393e05726 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 18 Jun 2021 09:58:30 +0200 Subject: [PATCH 21/24] Result.mergeFrom handles dateOfAcceptance --- .../eu/dnetlib/dhp/schema/oaf/Result.java | 9 + .../eu/dnetlib/dhp/schema/oaf/MergeTest.java | 161 +++++++++++++++++- 2 files changed, 166 insertions(+), 4 deletions(-) diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 945ebad..701f9ef 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.schema.oaf; import java.io.Serializable; import java.util.Comparator; import java.util.List; +import java.util.Objects; import java.util.stream.Collectors; import eu.dnetlib.dhp.schema.common.AccessRightComparator; @@ -256,6 +257,14 @@ public class Result extends OafEntity implements Serializable { if (r.getLanguage() != null && compareTrust(this, r) < 0) language = r.getLanguage(); + if (Objects.nonNull(r.getDateofacceptance())) { + if (Objects.isNull(getDateofacceptance())) { + dateofacceptance = r.getDateofacceptance(); + } else if (compareTrust(this, r) < 0) { + dateofacceptance = r.getDateofacceptance(); + } + } + country = mergeLists(country, r.getCountry()); subject = mergeLists(subject, r.getSubject()); diff --git a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java index 92bd92e..f9e70c7 100644 --- a/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java +++ b/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java @@ -37,8 +37,8 @@ public class MergeTest { @Test public void mergePublicationCollectedFromTest() { - Publication a = new Publication(); - Publication b = new Publication(); + Publication a = publication(); + Publication b = publication(); a.setCollectedfrom(Arrays.asList(setKV("a", "open"), setKV("b", "closed"))); b.setCollectedfrom(Arrays.asList(setKV("A", "open"), setKV("b", "Open"))); @@ -49,11 +49,140 @@ public class MergeTest { assertEquals(3, a.getCollectedfrom().size()); } + @Test + public void mergePublicationDateOfAcceptanceTest_bothPresent() { + + Publication a = publication(); + Publication b = publication(); + + a.setDateofacceptance(field("2021-06-18")); + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-18", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_bothPresent_1() { + + Publication a = publication("0.8"); + Publication b = publication("0.9"); + + a.setDateofacceptance(field("2021-06-18")); + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_bothPresent_2() { + + Publication a = publication("0.9"); + Publication b = publication("0.8"); + + a.setDateofacceptance(field("2021-06-18")); + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-18", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_leftMissing() { + + Publication a = publication(); + Publication b = publication(); + + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_leftMissing_1() { + + Publication a = publication("0.9"); + Publication b = publication("0.8"); + + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_leftMissing_2() { + + Publication a = publication("0.8"); + Publication b = publication("0.9"); + + b.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_rightMissing() { + + Publication a = publication(); + Publication b = publication(); + + a.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_rightMissing_1() { + + Publication a = publication("0.8"); + Publication b = publication("0.9"); + + a.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + + @Test + public void mergePublicationDateOfAcceptanceTest_rightMissing_2() { + + Publication a = publication("0.9"); + Publication b = publication("0.8"); + + a.setDateofacceptance(field("2021-06-19")); + + a.mergeFrom(b); + + assertNotNull(a.getDateofacceptance()); + assertEquals("2021-06-19", a.getDateofacceptance().getValue()); + } + @Test public void mergePublicationSubjectTest() { - Publication a = new Publication(); - Publication b = new Publication(); + Publication a = publication(); + Publication b = publication(); a.setSubject(Arrays.asList(setSP("a", "open", "classe"), setSP("b", "open", "classe"))); b.setSubject(Arrays.asList(setSP("A", "open", "classe"), setSP("c", "open", "classe"))); @@ -148,4 +277,28 @@ public class MergeTest { s.setQualifier(q); return s; } + + private Field field(T value) { + Field f = new Field(); + f.setValue(value); + return f; + } + + private Publication publication() { + Publication p = new Publication(); + p.setDataInfo(df("0.9")); + return p; + } + + private Publication publication(String trust) { + Publication p = new Publication(); + p.setDataInfo(df(trust)); + return p; + } + + private DataInfo df(String trust) { + DataInfo d = new DataInfo(); + d.setTrust(trust); + return d; + } } From a08220a6163869fe589484734ca582e07cd8ae81 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 18 Jun 2021 10:02:21 +0200 Subject: [PATCH 22/24] bumped pom version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ba822ef..0a31e7b 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.5.13-SNAPSHOT + 2.6.13-SNAPSHOT From 4a60d3024a53efc529182be97d6ded65f3e2b786 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 18 Jun 2021 10:03:11 +0200 Subject: [PATCH 23/24] [maven-release-plugin] prepare release dhp-schemas-2.6.13 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 0a31e7b..1c4cfc6 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.6.13-SNAPSHOT + 2.6.13 @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - HEAD + dhp-schemas-2.6.13 This module contains common schema classes meant to be used across the dnet-hadoop submodules From 1401484f15704712ba32456ca4b44546412b1277 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 18 Jun 2021 10:03:17 +0200 Subject: [PATCH 24/24] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 1c4cfc6..ed53320 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.6.13 + 2.6.14-SNAPSHOT @@ -32,7 +32,7 @@ scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git scm:git:gitea@code-repo.d4science.org:D-Net/dhp-schemas.git https://code-repo.d4science.org/D-Net/dhp-schemas/ - dhp-schemas-2.6.13 + HEAD This module contains common schema classes meant to be used across the dnet-hadoop submodules