From 7fe2433137854c0354782bb342fa4f7ad380003b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 11 Dec 2020 12:42:55 +0100 Subject: [PATCH 1/8] excluded transitive older junit dependencies, they can compromise the unit test executions --- dhp-workflows/dhp-graph-provision/pom.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index 1547056b94..c8fab5207c 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -59,6 +59,10 @@ solr-test-framework test + + junit + junit + com.carrotsearch * @@ -140,6 +144,12 @@ org.apache.zookeeper zookeeper + + + junit + junit + + From ce7a319e01654d5893488d8542e945219a8816e8 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 11 Dec 2020 12:44:17 +0100 Subject: [PATCH 2/8] using the correct assertion import --- .../eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java index 33def91b39..f57b8dcafd 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java @@ -6,15 +6,13 @@ import org.apache.solr.client.solrj.response.UpdateResponse; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import junit.framework.Assert; - public class SolrAdminApplicationTest extends SolrTest { @Test public void testPing() throws Exception { SolrPingResponse pingResponse = miniCluster.getSolrClient().ping(); log.info("pingResponse: '{}'", pingResponse.getStatus()); - Assert.assertTrue(pingResponse.getStatus() == 0); + Assertions.assertTrue(pingResponse.getStatus() == 0); } @Test From 61cd129deda31bb47a6db856573dc502d9f8f3cb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 11 Dec 2020 12:44:53 +0100 Subject: [PATCH 3/8] XML serialisation test --- .../oa/provision/XmlRecordFactoryTest.java | 18 +- .../dnetlib/dhp/oa/provision/publication.json | 827 ++++++++++++++++++ 2 files changed, 836 insertions(+), 9 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 992ab26e81..0f1912cc32 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -19,9 +19,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import eu.dnetlib.dhp.schema.oaf.Publication; -//TODO to enable it we need to update the joined_entity.json test file -@Disabled public class XmlRecordFactoryTest { private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; @@ -29,18 +28,15 @@ public class XmlRecordFactoryTest { @Test public void testXMLRecordFactory() throws IOException, DocumentException { - String json = IOUtils.toString(getClass().getResourceAsStream("joined_entity.json")); - - assertNotNull(json); - JoinedEntity je = new ObjectMapper().readValue(json, JoinedEntity.class); - assertNotNull(je); - ContextMapper contextMapper = new ContextMapper(); XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, otherDsTypeId); - String xml = xmlRecordFactory.build(je); + Publication p = new ObjectMapper() + .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); + + String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); assertNotNull(xml); @@ -48,6 +44,10 @@ public class XmlRecordFactoryTest { assertNotNull(doc); + // System.out.println(doc.asXML()); + + Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid")); + Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); // TODO add assertions based of values extracted from the XML record } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json new file mode 100644 index 0000000000..b6b183b154 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -0,0 +1,827 @@ +{ + "author": [ + { + "affiliation": [], + "fullname": "Lee, Jaehyun", + "name": "Jaehyun", + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "orcid", + "classname": "Open Researcher and Contributor ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "orcid_pending", + "classname": "Open Researcher and Contributor ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + } + ], + "rank": 1, + "surname": "Lee" + }, + { + "affiliation": [], + "fullname": "Berrada, Salim", + "name": "Salim", + "pid": [], + "rank": 2, + "surname": "Berrada" + }, + { + "affiliation": [], + "fullname": "Adamu-Lema, Fikru", + "name": "Fikru", + "pid": [], + "rank": 3, + "surname": "Adamu-Lema" + }, + { + "affiliation": [], + "fullname": "Nagy, Nicole", + "name": "Nicole", + "pid": [], + "rank": 4, + "surname": "Nagy" + }, + { + "affiliation": [], + "fullname": "Georgiev, Vihar P.", + "name": "Vihar P.", + "pid": [], + "rank": 5, + "surname": "Georgiev" + }, + { + "affiliation": [], + "fullname": "Sadi, Toufik", + "name": "Toufik", + "pid": [], + "rank": 6, + "surname": "Sadi" + }, + { + "affiliation": [], + "fullname": "Liang, Jie", + "name": "Jie", + "pid": [], + "rank": 7, + "surname": "Liang" + }, + { + "affiliation": [], + "fullname": "Ramos, Raphael", + "name": "Raphael", + "pid": [], + "rank": 8, + "surname": "Ramos" + }, + { + "affiliation": [], + "fullname": "Carrillo-Nunez, Hamilton", + "name": "Hamilton", + "pid": [], + "rank": 9, + "surname": "Carrillo-Nunez" + }, + { + "affiliation": [], + "fullname": "Kalita, Dipankar", + "name": "Dipankar", + "pid": [], + "rank": 10, + "surname": "Kalita" + }, + { + "affiliation": [], + "fullname": "Lilienthal, Katharina", + "name": "Katharina", + "pid": [], + "rank": 11, + "surname": "Lilienthal" + }, + { + "affiliation": [], + "fullname": "Wislicenus, Marcus", + "name": "Marcus", + "pid": [], + "rank": 12, + "surname": "Wislicenus" + }, + { + "affiliation": [], + "fullname": "Pandey, Reeturaj", + "name": "Reeturaj", + "pid": [], + "rank": 13, + "surname": "Pandey" + }, + { + "affiliation": [], + "fullname": "Chen, Bingan", + "name": "Bingan", + "pid": [], + "rank": 14, + "surname": "Chen" + }, + { + "affiliation": [], + "fullname": "Teo, Kenneth B.K.", + "name": "Kenneth B. K.", + "pid": [], + "rank": 15, + "surname": "Teo" + }, + { + "affiliation": [], + "fullname": "Goncalves, Goncalo", + "name": "Goncalo", + "pid": [], + "rank": 16, + "surname": "Goncalves" + }, + { + "affiliation": [], + "fullname": "Okuno, Hanako", + "name": "Hanako", + "pid": [], + "rank": 17, + "surname": "Okuno" + }, + { + "affiliation": [], + "fullname": "Uhlig, Benjamin", + "name": "Benjamin", + "pid": [], + "rank": 18, + "surname": "Uhlig" + }, + { + "affiliation": [], + "fullname": "Todri-Sanial, Aida", + "name": "Aida", + "pid": [], + "rank": 19, + "surname": "Todri-Sanial" + }, + { + "affiliation": [], + "fullname": "Dijon", + "name": "", + "pid": [], + "rank": 20, + "surname": "" + }, + { + "affiliation": [], + "fullname": "Jean", + "name": "", + "pid": [], + "rank": 21, + "surname": "" + } + ], + "collectedfrom": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + } + ], + "context": [], + "contributor": [], + "country": [], + "coverage": [], + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "dateofcollection": "2020-01-27T11:32:33.729Z", + "dateoftransformation": "2020-01-27T12:03:59.662Z", + "description": [], + "embargoenddate": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "extraInfo": [], + "format": [], + "fulltext": [], + "id": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c", + "instance": [ + { + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "collectedfrom": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "dateofacceptance": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "2018-01-01" + }, + "distributionlocation": "", + "hostedby": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747", + "value": "VIRTA" + }, + "instancetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "license": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "url": [ + "http://juuli.fi/Record/0331473718", + "http://dx.doi.org/10.1109/TED.2018.2853550" + ] + } + ], + "journal": { + "conferencedate": "", + "conferenceplace": "", + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "edition": "", + "ep": "3892", + "iss": "9", + "issnLinking": "", + "issnOnline": "", + "issnPrinted": "0018-9383", + "name": "IEEE Transactions on Electron Devices", + "sp": "3884", + "vol": "65" + }, + "language": { + "classid": "en", + "classname": "en", + "schemeid": "dnet:languages", + "schemename": "dnet:languages" + }, + "lastupdatetimestamp": 0, + "originalId": [ + "0331473718", + "10.1109/TED.2018.2853550", + "http://juuli.fi/Record/0331473718" + ], + "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "10.1109/TED.2018.2853550" + } + ], + "publisher": { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "value": "" + }, + "relevantdate": [], + "resourcetype": { + "classid": "0001", + "classname": "Article", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "source": [], + "subject": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "http://finto.fi/okm-tieteenala/en/", + "classname": "finto", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "ta114" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "Conductivity" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "Contacts" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "Cu-carbon nanotubes (CNT) composites" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "density functional theory (DFT)" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "Discrete Fourier transforms" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "Electromigration" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "electromigration (EM)" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "electrothermal" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "interconnects" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "Lattices" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "multiscale simulation" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "Resistance" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "self-heating." + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "value": "Thermal conductivity" + } + ], + "title": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "trust": "" + }, + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "value": "Understanding Electromigration in Cu-CNT Composite Interconnects A Multiscale Electrothermal Simulation Study" + } + ] +} \ No newline at end of file From 1506f49052655964bfbf8390c10361f92953b245 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 14 Dec 2020 11:14:03 +0100 Subject: [PATCH 4/8] Xml record serialization for author PIDs: 1) only one value per PID type is allowed; 2) orcid prevails over orcid_pending --- .../utils/AuthorPidTypeComparator.java | 52 +++++++++++++++++++ .../oa/provision/utils/XmlRecordFactory.java | 12 +++++ .../oa/provision/XmlRecordFactoryTest.java | 14 +++-- .../dnetlib/dhp/oa/provision/publication.json | 49 ++++++++--------- 4 files changed, 96 insertions(+), 31 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java new file mode 100644 index 0000000000..7391569ed9 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.provision.utils; + +import java.util.Comparator; +import java.util.Optional; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public class AuthorPidTypeComparator implements Comparator { + + @Override + public int compare(StructuredProperty left, StructuredProperty right) { + + String lClass = Optional + .ofNullable(left) + .map(StructuredProperty::getQualifier) + .map(Qualifier::getClassid) + .orElse(null); + + String rClass = Optional + .ofNullable(right) + .map(StructuredProperty::getQualifier) + .map(Qualifier::getClassid) + .orElse(null); + + if (lClass == null && rClass == null) + return 0; + if (lClass == null) + return 1; + if (rClass == null) + return -1; + + if (lClass.equals(rClass)) + return 0; + + if (lClass.equals(ModelConstants.ORCID)) + return -1; + if (rClass.equals(ModelConstants.ORCID)) + return 1; + + if (lClass.equals(ModelConstants.ORCID_PENDING)) + return -1; + if (rClass.equals(ModelConstants.ORCID_PENDING)) + return 1; + + return 0; + } + +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index eba7362287..9f16e99d8a 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -254,6 +254,18 @@ public class XmlRecordFactory implements Serializable { p -> p, (p1, p2) -> p1)) .values() + .stream() + .collect( + Collectors + .groupingBy( + p -> p.getValue(), + Collectors + .mapping( + p -> p, + Collectors.minBy(new AuthorPidTypeComparator())))) + .values() + .stream() + .map(op -> op.get()) .forEach( sp -> { String pidType = getAuthorPidType(sp.getQualifier().getClassid()); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 0f1912cc32..8ae8a55c39 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -14,6 +14,7 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; @@ -25,6 +26,9 @@ public class XmlRecordFactoryTest { private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; + private static ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + @Test public void testXMLRecordFactory() throws IOException, DocumentException { @@ -33,7 +37,7 @@ public class XmlRecordFactoryTest { XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, otherDsTypeId); - Publication p = new ObjectMapper() + Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); @@ -44,10 +48,14 @@ public class XmlRecordFactoryTest { assertNotNull(doc); - // System.out.println(doc.asXML()); + System.out.println(doc.asXML()); - Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid")); + Assertions.assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid")); Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); + + Assertions.assertEquals("0000-0001-9613-9956", doc.valueOf("//creator[@rank = '2']/@orcid")); + Assertions.assertEquals("", doc.valueOf("//creator[@rank = '2']/@orcid_pending")); + // TODO add assertions based of values extracted from the XML record } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index b6b183b154..ea7a300513 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -6,41 +6,15 @@ "name": "Jaehyun", "pid": [ { - "dataInfo": { - "deletedbyinference": false, - "inferenceprovenance": "", - "inferred": false, - "invisible": false, - "provenanceaction": { - "classid": "", - "classname": "", - "schemeid": "", - "schemename": "" - }, - "trust": "" - }, "qualifier": { "classid": "orcid", "classname": "Open Researcher and Contributor ID", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types" }, - "value": "0000-0001-9613-6639" + "value": "0000-0001-9613-6638" }, { - "dataInfo": { - "deletedbyinference": false, - "inferenceprovenance": "", - "inferred": false, - "invisible": false, - "provenanceaction": { - "classid": "", - "classname": "", - "schemeid": "", - "schemename": "" - }, - "trust": "" - }, "qualifier": { "classid": "orcid_pending", "classname": "Open Researcher and Contributor ID", @@ -57,7 +31,26 @@ "affiliation": [], "fullname": "Berrada, Salim", "name": "Salim", - "pid": [], + "pid": [ + { + "qualifier": { + "classid": "orcid", + "classname": "Open Researcher and Contributor ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-9956" + }, + { + "qualifier": { + "classid": "orcid_pending", + "classname": "Open Researcher and Contributor ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-9956" + } + ], "rank": 2, "surname": "Berrada" }, From 152916890f06763d1e562d54c81ab2cd75def7ac Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 14 Dec 2020 14:40:05 +0100 Subject: [PATCH 5/8] renamed test name --- .../resulttocommunityfromsemrel/ResultToCommunityJobTest.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java index a8e1ab8414..7709e00a85 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java @@ -24,7 +24,6 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.orcidtoresultfromsemrel.OrcidPropagationJobTest; import eu.dnetlib.dhp.schema.oaf.Dataset; public class ResultToCommunityJobTest { @@ -66,7 +65,7 @@ public class ResultToCommunityJobTest { } @Test - public void test1() throws Exception { + public void testSparkResultToCommunityThroughSemRelJob() throws Exception { SparkResultToCommunityThroughSemRelJob .main( new String[] { From 7d325e2c5719a5ced2595229f4a8cb27cb276866 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 14 Dec 2020 14:40:54 +0100 Subject: [PATCH 6/8] using actual result subclasses instead of their parent class --- .../SparkResultToCommunityFromOrganizationJob.java | 2 +- .../SparkResultToCommunityThroughSemRelJob.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 66297e1779..60ad438598 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -108,7 +108,7 @@ public class SparkResultToCommunityFromOrganizationJob { .stream() .map(con -> con.getId()) .collect(Collectors.toList()); - Result res = new Result(); + R res = (R) ret.getClass().newInstance(); res.setId(ret.getId()); List propagatedContexts = new ArrayList<>(); for (String cId : communitySet) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 0c613d1b4a..5ac1176931 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -130,7 +130,7 @@ public class SparkResultToCommunityThroughSemRelJob { }) .filter(Objects::nonNull) .collect(Collectors.toList()); - Result r = new Result(); + R r = (R) ret.getClass().newInstance(); r.setId(ret.getId()); r.setContext(contextList); ret.mergeFrom(r); From e8ef8c63d458fc0545bb7df79edd4d0255fe7e1a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 14 Dec 2020 15:04:44 +0100 Subject: [PATCH 7/8] delegate merging of OafEntity.dataInfo to the implementation of subclasses --- dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java | 2 -- .../src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java | 2 -- 2 files changed, 4 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java index f4f5baa7bb..494123fdf5 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java @@ -62,8 +62,6 @@ public abstract class Oaf implements Serializable { .distinct() // relies on KeyValue.equals .collect(Collectors.toList())); - mergeOAFDataInfo(o); - setLastupdatetimestamp( Math .max( diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java index b698c957d0..4be4d5d30c 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java @@ -351,8 +351,6 @@ public class Project extends OafEntity implements Serializable { ? p.getFundedamount() : fundedamount; - // programme = mergeLists(programme, p.getProgramme()); - h2020classification = mergeLists(h2020classification, p.getH2020classification()); mergeOAFDataInfo(e); From b6f08ce226971f36df418c2b3edb93cdf01ea358 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 14 Dec 2020 15:07:31 +0100 Subject: [PATCH 8/8] re-adding the old junit:junit dep as solr-test-framework needs it --- dhp-workflows/dhp-graph-provision/pom.xml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index c8fab5207c..0d44d8e5eb 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -54,15 +54,18 @@ spark-solr + + + junit + junit + 4.12 + test + org.apache.solr solr-test-framework test - - junit - junit - com.carrotsearch *