This commit is contained in:
Miriam Baglioni 2021-06-21 09:58:05 +02:00
commit 92750c418c
6 changed files with 260 additions and 19 deletions

13
pom.xml
View File

@ -5,7 +5,7 @@
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId>
<packaging>jar</packaging>
<version>2.4.7-SNAPSHOT</version>
<version>2.6.14-SNAPSHOT</version>
<licenses>
<license>
@ -262,6 +262,12 @@
<version>${dhp.commons.lang.version}</version>
</dependency>
<dependency>
<groupId>com.github.sisyphsu</groupId>
<artifactId>dateparser</artifactId>
<version>1.0.7</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
@ -330,6 +336,11 @@
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>com.github.sisyphsu</groupId>
<artifactId>dateparser</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>

View File

@ -8,6 +8,7 @@ public class ModelConstants {
public static final String ORCID = "orcid";
public static final String ORCID_PENDING = "orcid_pending";
public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID";
public static final String ORCID_DS = ORCID.toUpperCase();
public static final String CROSSREF_ID = "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2";
public static final String DATACITE_ID = "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254";
@ -34,7 +35,7 @@ public class ModelConstants {
public static final String DNET_COUNTRY_TYPE = "dnet:countries";
public static final String DNET_REVIEW_LEVELS = "dnet:review_levels";
public static final String DNET_PROGRAMMING_LANGUAGES = "dnet:programming_languages";
public static final String DNET_EXTERNAL_REF_TYPES = "dnet:externalReference_typologies";
public static final String DNET_EXTERNAL_REFERENCE_TYPE = "dnet:externalReference_typologies";
public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository";
public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry";

View File

@ -7,17 +7,15 @@ import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.ParseException;
import java.time.Instant;
import java.time.format.DateTimeFormatter;
import java.util.Date;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.time.format.DateTimeParseException;
import java.util.*;
import java.util.function.Function;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DateUtils;
import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
@ -308,6 +306,8 @@ public class ModelSupport {
private static final String schemeTemplate = "dnet:%s_%s_relations";
public static final String DATE_FORMAT = "yyyy-MM-dd";
private ModelSupport() {
}
@ -501,12 +501,17 @@ public class ModelSupport {
}
if (StringUtils.isNotBlank(dateA) && StringUtils.isNotBlank(dateB)) {
final Date a = Date.from(Instant.from(DateTimeFormatter.ISO_INSTANT.parse(dateA)));
final Date b = Date.from(Instant.from(DateTimeFormatter.ISO_INSTANT.parse(dateB)));
final Date a = DateParserUtils.parseDate(dateA);
final Date b = DateParserUtils.parseDate(dateB);
return a.before(b) ? dateA : dateB;
if (Objects.nonNull(a) && Objects.nonNull(b)) {
return a.before(b) ? dateA : dateB;
} else {
return null;
}
} else {
return null;
}
}
}

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
@ -256,6 +257,14 @@ public class Result extends OafEntity implements Serializable {
if (r.getLanguage() != null && compareTrust(this, r) < 0)
language = r.getLanguage();
if (Objects.nonNull(r.getDateofacceptance())) {
if (Objects.isNull(getDateofacceptance())) {
dateofacceptance = r.getDateofacceptance();
} else if (compareTrust(this, r) < 0) {
dateofacceptance = r.getDateofacceptance();
}
}
country = mergeLists(country, r.getCountry());
subject = mergeLists(subject, r.getSubject());

View File

@ -5,8 +5,58 @@ import org.apache.commons.lang3.EnumUtils;
public enum PidType {
// Result
doi, pmid, pmc, handle, arXiv, nct, pdb,
/**
* The DOI syntax shall be made up of a DOI prefix and a DOI suffix separated by a forward slash.
*
* There is no defined limit on the length of the DOI name, or of the DOI prefix or DOI suffix.
*
* The DOI name is case-insensitive and can incorporate any printable characters from the legal graphic characters
* of Unicode. Further constraints on character use (e.g. use of language-specific alphanumeric characters) can be
* defined for an application by the ISO 26324 Registration Authority.
*
*
* DOI prefix: The DOI prefix shall be composed of a directory indicator followed by a registrant code.
* These two components shall be separated by a full stop (period). The directory indicator shall be "10" and
* distinguishes the entire set of character strings (prefix and suffix) as digital object identifiers within the
* resolution system.
*
* Registrant code: The second element of the DOI prefix shall be the registrant code. The registrant code is a
* unique string assigned to a registrant.
*
* DOI suffix: The DOI suffix shall consist of a character string of any length chosen by the registrant.
* Each suffix shall be unique to the prefix element that precedes it. The unique suffix can be a sequential number,
* or it might incorporate an identifier generated from or based on another system used by the registrant
* (e.g. ISAN, ISBN, ISRC, ISSN, ISTC, ISNI; in such cases, a preferred construction for such a suffix can be
* specified, as in Example 1).
*
* Source: https://www.doi.org/doi_handbook/2_Numbering.html#2.2
*/
doi,
/**
* PubMed Unique Identifier (PMID)
*
* This field is a 1-to-8 digit accession number with no leading zeros. It is present on all records and is the
* accession number for managing and disseminating records. PMIDs are not reused after records are deleted.
*
* Beginning in February 2012 PMIDs include extensions following a decimal point to account for article versions
* (e.g., 21804956.2). All citations are considered version 1 until replaced. The extended PMID is not displayed
* on the MEDLINE format.
*
* View the citation in abstract format in PubMed to access additional versions when available (see the article in
* the Jan-Feb 2012 NLM Technical Bulletin).
*
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid
*/
pmid,
/**
* This field contains the unique identifier for the cited article in PubMed Central. The identifier begins with the
* prefix PMC.
*
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc
*/
pmc, handle, arXiv, nct, pdb,
// Organization
openorgs, corda, corda_h2020, GRID, mag_id, urn,

View File

@ -37,8 +37,8 @@ public class MergeTest {
@Test
public void mergePublicationCollectedFromTest() {
Publication a = new Publication();
Publication b = new Publication();
Publication a = publication();
Publication b = publication();
a.setCollectedfrom(Arrays.asList(setKV("a", "open"), setKV("b", "closed")));
b.setCollectedfrom(Arrays.asList(setKV("A", "open"), setKV("b", "Open")));
@ -49,11 +49,140 @@ public class MergeTest {
assertEquals(3, a.getCollectedfrom().size());
}
@Test
public void mergePublicationDateOfAcceptanceTest_bothPresent() {
Publication a = publication();
Publication b = publication();
a.setDateofacceptance(field("2021-06-18"));
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-18", a.getDateofacceptance().getValue());
}
@Test
public void mergePublicationDateOfAcceptanceTest_bothPresent_1() {
Publication a = publication("0.8");
Publication b = publication("0.9");
a.setDateofacceptance(field("2021-06-18"));
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
@Test
public void mergePublicationDateOfAcceptanceTest_bothPresent_2() {
Publication a = publication("0.9");
Publication b = publication("0.8");
a.setDateofacceptance(field("2021-06-18"));
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-18", a.getDateofacceptance().getValue());
}
@Test
public void mergePublicationDateOfAcceptanceTest_leftMissing() {
Publication a = publication();
Publication b = publication();
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
@Test
public void mergePublicationDateOfAcceptanceTest_leftMissing_1() {
Publication a = publication("0.9");
Publication b = publication("0.8");
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
@Test
public void mergePublicationDateOfAcceptanceTest_leftMissing_2() {
Publication a = publication("0.8");
Publication b = publication("0.9");
b.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
@Test
public void mergePublicationDateOfAcceptanceTest_rightMissing() {
Publication a = publication();
Publication b = publication();
a.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
@Test
public void mergePublicationDateOfAcceptanceTest_rightMissing_1() {
Publication a = publication("0.8");
Publication b = publication("0.9");
a.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
@Test
public void mergePublicationDateOfAcceptanceTest_rightMissing_2() {
Publication a = publication("0.9");
Publication b = publication("0.8");
a.setDateofacceptance(field("2021-06-19"));
a.mergeFrom(b);
assertNotNull(a.getDateofacceptance());
assertEquals("2021-06-19", a.getDateofacceptance().getValue());
}
@Test
public void mergePublicationSubjectTest() {
Publication a = new Publication();
Publication b = new Publication();
Publication a = publication();
Publication b = publication();
a.setSubject(Arrays.asList(setSP("a", "open", "classe"), setSP("b", "open", "classe")));
b.setSubject(Arrays.asList(setSP("A", "open", "classe"), setSP("c", "open", "classe")));
@ -91,13 +220,25 @@ public class MergeTest {
b = createRel(true, "2016-04-05T12:41:19.202Z");
a.mergeFrom(b);
assertEquals("2016-04-05T12:41:19.202Z", a.getValidationDate());
a = createRel(true, "2020-09-10 11:08:52");
b = createRel(true, "2021-09-10 11:08:52");
a.mergeFrom(b);
assertEquals("2020-09-10 11:08:52", a.getValidationDate());
a = createRel(true, "2021-03-16T10:32:42Z");
b = createRel(true, "2020-03-16T10:32:42Z");
a.mergeFrom(b);
assertEquals("2020-03-16T10:32:42Z", a.getValidationDate());
}
@Test
public void mergeRelationTestParseException() {
assertThrows(DateTimeParseException.class, () -> {
Relation a = createRel(true, "2016-04-05");
Relation b = createRel(true, "2016-04-05");
Relation a = createRel(true, "Once upon a time ...");
Relation b = createRel(true, "... in a far away land");
a.mergeFrom(b);
});
}
@ -136,4 +277,28 @@ public class MergeTest {
s.setQualifier(q);
return s;
}
private <T> Field<T> field(T value) {
Field<T> f = new Field<T>();
f.setValue(value);
return f;
}
private Publication publication() {
Publication p = new Publication();
p.setDataInfo(df("0.9"));
return p;
}
private Publication publication(String trust) {
Publication p = new Publication();
p.setDataInfo(df(trust));
return p;
}
private DataInfo df(String trust) {
DataInfo d = new DataInfo();
d.setTrust(trust);
return d;
}
}