From f8468c9c2296905c0f5102df78a797f4ab0ccf7c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 1 Dec 2020 20:09:35 +0100 Subject: [PATCH 1/9] added extention for new author pid (orcid_pending) --- .../src/main/java/eu/dnetlib/dhp/PropagationConstant.java | 5 ++++- .../PrepareResultOrcidAssociationStep1.java | 3 ++- .../SparkOrcidToResultFromSemRelJob.java | 5 +++-- .../dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java | 5 ++++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 1cc41c395..d38d79fec 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -44,7 +44,10 @@ public class PropagationConstant { public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; - public static final String PROPAGATION_AUTHOR_PID = "ORCID"; + public static final String PROPAGATION_AUTHOR_PID_CLASSID = "orcid_pending"; + public static final String ORCID = "orcid"; + public static final String PROPAGATION_AUTHOR_PID_CLASSNAME = "Open Researcher and Contributor ID"; + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 869831ba2..1e445828c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -102,7 +102,8 @@ public class PrepareResultOrcidAssociationStep1 { + " FROM result " + " LATERAL VIEW EXPLODE (author) a AS MyT " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " - + " WHERE lower(MyP.qualifier.classid) = 'orcid') tmp " + + " WHERE lower(MyP.qualifier.classid) = '" + ORCID + "' or " + +" lower(MyP.qalifier.classid) = '" + PROPAGATION_AUTHOR_PID_CLASSID + "') tmp " + " GROUP BY id) r_t " + " JOIN (" + " SELECT source, target " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 3fc127064..3e5c4d641 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -176,7 +176,7 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); + p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID_CLASSID, PROPAGATION_AUTHOR_PID_CLASSNAME)); p .setDataInfo( getDataInfo( @@ -201,7 +201,8 @@ public class SparkOrcidToResultFromSemRelJob { return false; } for (StructuredProperty pid : pids.get()) { - if (PROPAGATION_AUTHOR_PID.equals(pid.getQualifier().getClassid())) { + if (PROPAGATION_AUTHOR_PID_CLASSID.equals(pid.getQualifier().getClassid().toLowerCase()) || + ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { return true; } } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index edd2e7ba7..aeaa8a3c1 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -5,6 +5,8 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver; +import eu.dnetlib.dhp.PropagationConstant; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -166,7 +168,8 @@ public class OrcidPropagationJobTest { propagatedAuthors .filter( "id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' " - + "and name = 'Vajinder' and surname = 'Kumar' and pidType = 'ORCID'") + + "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" + + PropagationConstant.PROPAGATION_AUTHOR_PID_CLASSID +"'") .count()); Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count()); From a417624670aa1b1908c8e3a40ee457bf45241112 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 2 Dec 2020 10:15:26 +0100 Subject: [PATCH 2/9] tests for raw graph mapping --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 17 +++ .../dnetlib/dhp/oa/graph/raw/oaf-bologna.xml | 115 ++++++++++++++++++ .../eu/dnetlib/dhp/oa/graph/raw/textgrid.xml | 24 ++-- 3 files changed, 145 insertions(+), 11 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf-bologna.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 2d4cccdfb..46cb1a535 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -358,6 +358,23 @@ public class MappersTest { System.out.println(p.getTitle().get(0).getValue()); } + @Test + void testBologna() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.xml")); + final List list = new OafToOafMapper(vocs, false).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertValidId(p.getId()); + assertValidId(p.getCollectedfrom().get(0).getKey()); + System.out.println(p.getTitle().get(0).getValue()); + assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); + System.out.println(p.getTitle().get(0).getValue()); + } + private void assertValidId(final String id) { assertEquals(49, id.length()); assertEquals('|', id.charAt(2)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf-bologna.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf-bologna.xml new file mode 100644 index 000000000..296a2fe94 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf-bologna.xml @@ -0,0 +1,115 @@ + + +
+ od_________3::4f7038e665fdd3800297735f087a530c + oai:amsacta.unibo.it:6382 + + + + + + 2020-05-21T05:26:15.93Z + 2020-08-01T11:06:26.977Z + od_________3 +
+ + CONSOLE Project - Deliverable 5.1 - "Guidelines for Community of Practice (CoP) + management at local level" + Blanco-Velázquez, Francisco José + Runge, Tania + Anaya-Romero, María + 2020-05-11 + The Community of Practice (CoP) is foreseen to play a key role in boosting + innovation in the effective and long-lasting delivery of agri-environmental-climate + public goods (AECPGs). The CONSOLE CoP will be organized around practitioners + experienced in the provision of AECPGs and those interested in it and will be nourished + throughout the project lifetime. In line with the definition of Wenger1 the CoP is + defined as a group of people (the community) who share a common interest and who learn + how to perform better through regular interaction and exchange of experiences. The idea + is to set up a pan-European CoP with national and/or local (regional) sub-groups managed + by the CONSOLE partners with the aim of developing improved and novel contract solutions + in collaboration with its members. This document sets out: (a) the purpose and + objectives of the CoP in CONSOLE, (b) the setting up and management of the CoP at + European, national and local level, (c) the process for motivating individuals to + participate. The CONSOLE CoP is intended to facilitate knowledge exchange and mutual + learning, mainly through virtual contacts. Participation in the CoP is based on sharing + and reciprocity principle. A core objective of these guidelines is to ensure a sound + management and facilitation of the CoP by all CONSOLE partners in view of optimizing the + input from CoP members in the project activities. Members within a national or local + CONSOLE CoP are: 1) CONSOLE partners; 2) practitioners, mainly farmers and foresters, + who test and implement practically the contractual models, and 3) experts, that may have + punctual interventions in the CoP. A vibrant CoP with active involvement of its members + is crucial for the assessment and testing of improved and novel voluntary measures for + the delivery of AECPGs. For each of the CONSOLE countries one national contact person is + nominated to take over the role as national focal point for the CoP activities in his + country and to serve as facilitator of the CoP. These facilitators are responsible to + ensure participation along the various project tasks foreseen within several WPs and to + overcome potential language barriers. The national contact person may be supported by + other CONSOLE partners from his country for local activities. At local level the CoP + benefits from existing contacts of CONSOLE partners to practitioners, including the + experts interviewed for the case studies analysis within WP2. The forming/development of + a CoP requires promoting exchanges taking on board the interests and needs of the actors + involved and to animate them to share their expertise. Collaborative learning within the + CoP supported by dedicated training activities will be crucial to ensure the intended + major transition towards smarter AECPGs-related practices in Europe. These guidelines + focus on the identification of the various tasks where CoP participation is foreseen at + local (regional) level and to provide support for the identification of potential + members. In the deliverable D5.2 “Guidelines for testing the solutions catalogue by CoP + and partners” further details about the involvement of the CoP will be + provided. + application/pdf + http://amsacta.unibo.it/6382/ + eng + info:eu-repo/grantAgreement/EC/H2020/817949/EU/CONtract SOLutions for Effective + and lasting delivery of agri-environmental-climate public goods by EU agriculture and + forestry/CONSOLE + info:eu-repo/semantics/altIdentifier/doi/10.6092/unibo/amsacta/6382 + info:eu-repo/semantics/reference/url/https://console-project.eu/ + Blanco-Velázquez, Francisco José ; Runge, Tania ; Anaya-Romero, María (2020) + CONSOLE Project - Deliverable 5.1 - "Guidelines for Community of Practice (CoP) + management at local level". p. 21. DOI 10.6092/unibo/amsacta/6382 + <http://doi.org/10.6092/unibo/amsacta/6382>. + AGR/01 Economia ed estimo rurale + SECS-S/01 Statistica + info:eu-repo/semantics/book + info:eu-repo/semantics/publishedVersion + 0002 + 2020-05-11 + corda__h2020::817949 + + opendoar____::3 + OPEN + + + 10.6092/unibo/amsacta/6382 + + + + + + http%3A%2F%2Famsacta.unibo.it%2Fcgi%2Fopenaire3 + oai:amsacta.unibo.it:6382 + 2020-05-13T09:27:00Z + http://www.openarchives.org/OAI/2.0/oai_dc/ + + + + false + false + 0.9 + + + + +
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid.xml index d6970ab3e..eddbc1ec4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid.xml @@ -1,23 +1,24 @@ - + + xmlns:dri="http://www.driver-repository.eu/namespace/dri" + xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> r3f52792889d::000051aa1f61d77d2c0b340091f8024e textgrid:q9cv.0 2020-11-17T09:34:11.128+01:00 r3f52792889d textgrid:q9cv.0 2012-01-21T13:35:20Z - 2020-11-17T09:46:21.551+01:00 + 2020-11-17T19:08:56.703+01:00 + xmlns:dri="http://www.driver-repository.eu/namespace/dri" + xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> hdl:11858/00-1734-0000-0003-7664-F @@ -46,8 +47,8 @@ 2012-01-21T13:35:20Z - - textgrid:q9cv.0 + + textgrid:q9cv.0 http://hdl.handle.net/hdl:11858/00-1734-0000-0003-7664-F @@ -83,7 +84,7 @@ hdl:11858/00-1734-0000-0003-7664-F 0021 0002 - 2012-01-01 + 2012-01-21 OPEN http://creativecommons.org/licenses/by/3.0/de/legalcode und @@ -91,7 +92,8 @@ + xmlns:dri="http://www.driver-repository.eu/namespace/dri" + xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> https%3A%2F%2Fdev.textgridlab.org%2F1.0%2Ftgoaipmh%2Foai From 2d15667b4a49b8de59683650614b5027df547da6 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 2 Dec 2020 10:16:26 +0100 Subject: [PATCH 3/9] testing XML generation from json object (case AMS ACTA) --- .../oa/provision/XmlRecordFactoryTest.java | 39 +- .../dnetlib/dhp/oa/provision/oaf-bologna.json | 379 ++++++++++++++++++ 2 files changed, 416 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/oaf-bologna.json diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 992ab26e8..619df7716 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -5,8 +5,15 @@ import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import java.io.StringReader; +import java.util.List; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; @@ -19,9 +26,10 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import org.mockito.Mock; //TODO to enable it we need to update the joined_entity.json test file -@Disabled +//@Disabled public class XmlRecordFactoryTest { private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; @@ -35,10 +43,35 @@ public class XmlRecordFactoryTest { JoinedEntity je = new ObjectMapper().readValue(json, JoinedEntity.class); assertNotNull(je); + Document doc = buildXml(je); + ////TODO specific test assertion on doc + } + + + + @Test + void testBologna() throws IOException, DocumentException { + final String json = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.json")); + Publication oaf = new ObjectMapper().readValue(json, Publication.class); + assertNotNull(oaf); + JoinedEntity je = new JoinedEntity(); + je.setEntity(oaf); + assertNotNull(je); + + Document doc = buildXml(je); + //TODO specific test assertion on doc + + System.out.println(doc.asXML()); + + + + } + + private Document buildXml(JoinedEntity je) throws DocumentException { ContextMapper contextMapper = new ContextMapper(); XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + otherDsTypeId); String xml = xmlRecordFactory.build(je); @@ -49,5 +82,7 @@ public class XmlRecordFactoryTest { assertNotNull(doc); // TODO add assertions based of values extracted from the XML record + + return doc; } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/oaf-bologna.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/oaf-bologna.json new file mode 100644 index 000000000..3bb7d5b68 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/oaf-bologna.json @@ -0,0 +1,379 @@ + + { + "collectedfrom": [ + { + "key": "10|opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf3", + "value": "AMS Acta", + "dataInfo": null + } + ], + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + }, + "lastupdatetimestamp": 1606898557407, + "id": "50|od_________3::4f7038e665fdd3800297735f087a530c", + "originalId": [ + "oai:amsacta.unibo.it:6382" + ], + "pid": [ + { + "value": "10.6092/unibo/amsacta/6382", + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofcollection": "", + "dateoftransformation": "2020-08-01T11:06:26.977Z", + "extraInfo": [], + "oaiprovenance": { + "originDescription": { + "harvestDate": "2020-05-21T05:26:15.93Z", + "altered": true, + "baseURL": "http%3A%2F%2Famsacta.unibo.it%2Fcgi%2Fopenaire3", + "identifier": "oai:amsacta.unibo.it:6382", + "datestamp": "2020-05-13T09:27:00Z", + "metadataNamespace": "http://www.openarchives.org/OAI/2.0/oai_dc/" + } + }, + "measures": null, + "author": [ + { + "fullname": "Blanco-Velázquez, Francisco José", + "name": "Francisco José", + "surname": "Blanco-Velázquez", + "rank": 1, + "pid": [], + "affiliation": null + }, + { + "fullname": "Runge, Tania", + "name": "Tania", + "surname": "Runge", + "rank": 2, + "pid": [], + "affiliation": null + }, + { + "fullname": "Anaya-Romero, María", + "name": "María", + "surname": "Anaya-Romero", + "rank": 3, + "pid": [], + "affiliation": null + } + ], + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "language": { + "classid": "eng", + "classname": "English", + "schemeid": "dnet:languages", + "schemename": "dnet:languages" + }, + "country": [], + "subject": [ + { + "value": "AGR/01 Economia ed estimo rurale", + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:result_subject", + "schemename": "dnet:result_subject" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "SECS-S/01 Statistica", + "qualifier": { + "classid": "keyword", + "classname": "keyword", + "schemeid": "dnet:result_subject", + "schemename": "dnet:result_subject" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "title": [ + { + "value": "CONSOLE Project - Deliverable 5.1 - \"Guidelines for Community of Practice (CoP)\n management at local level\"", + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "relevantdate": [], + "description": [ + { + "value": "The Community of Practice (CoP) is foreseen to play a key role in boosting\n innovation in the effective and long-lasting delivery of agri-environmental-climate\n public goods (AECPGs). The CONSOLE CoP will be organized around practitioners\n experienced in the provision of AECPGs and those interested in it and will be nourished\n throughout the project lifetime. In line with the definition of Wenger1 the CoP is\n defined as a group of people (the community) who share a common interest and who learn\n how to perform better through regular interaction and exchange of experiences. The idea\n is to set up a pan-European CoP with national and/or local (regional) sub-groups managed\n by the CONSOLE partners with the aim of developing improved and novel contract solutions\n in collaboration with its members. This document sets out: (a) the purpose and\n objectives of the CoP in CONSOLE, (b) the setting up and management of the CoP at\n European, national and local level, (c) the process for motivating individuals to\n participate. The CONSOLE CoP is intended to facilitate knowledge exchange and mutual\n learning, mainly through virtual contacts. Participation in the CoP is based on sharing\n and reciprocity principle. A core objective of these guidelines is to ensure a sound\n management and facilitation of the CoP by all CONSOLE partners in view of optimizing the\n input from CoP members in the project activities. Members within a national or local\n CONSOLE CoP are: 1) CONSOLE partners; 2) practitioners, mainly farmers and foresters,\n who test and implement practically the contractual models, and 3) experts, that may have\n punctual interventions in the CoP. A vibrant CoP with active involvement of its members\n is crucial for the assessment and testing of improved and novel voluntary measures for\n the delivery of AECPGs. For each of the CONSOLE countries one national contact person is\n nominated to take over the role as national focal point for the CoP activities in his\n country and to serve as facilitator of the CoP. These facilitators are responsible to\n ensure participation along the various project tasks foreseen within several WPs and to\n overcome potential language barriers. The national contact person may be supported by\n other CONSOLE partners from his country for local activities. At local level the CoP\n benefits from existing contacts of CONSOLE partners to practitioners, including the\n experts interviewed for the case studies analysis within WP2. The forming/development of\n a CoP requires promoting exchanges taking on board the interests and needs of the actors\n involved and to animate them to share their expertise. Collaborative learning within the\n CoP supported by dedicated training activities will be crucial to ensure the intended\n major transition towards smarter AECPGs-related practices in Europe. These guidelines\n focus on the identification of the various tasks where CoP participation is foreseen at\n local (regional) level and to provide support for the identification of potential\n members. In the deliverable D5.2 “Guidelines for testing the solutions catalogue by CoP\n and partners” further details about the involvement of the CoP will be\n provided.", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofacceptance": { + "value": "2020-05-11", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "publisher": null, + "embargoenddate": null, + "source": [ + { + "value": "Blanco-Velázquez, Francisco José ; Runge, Tania ; Anaya-Romero, María (2020)\n CONSOLE Project - Deliverable 5.1 - \"Guidelines for Community of Practice (CoP)\n management at local level\". p. 21. DOI 10.6092/unibo/amsacta/6382\n .", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "fulltext": [], + "format": [ + { + "value": "application/pdf", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "contributor": [], + "resourcetype": null, + "coverage": [], + "bestaccessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "context": [], + "externalReference": [], + "instance": [ + { + "license": null, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "instancetype": { + "classid": "0002", + "classname": "Book", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "hostedby": { + "key": "10|opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf3", + "value": "AMS Acta", + "dataInfo": null + }, + "url": [ + "http://amsacta.unibo.it/6382/" + ], + "distributionlocation": "", + "collectedfrom": { + "key": "10|opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf3", + "value": "AMS Acta", + "dataInfo": null + }, + "dateofacceptance": { + "value": "2020-05-11", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "processingchargeamount": null, + "processingchargecurrency": null, + "refereed": { + "classid": "UNKNOWN", + "classname": "Unknown", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + } + } + ], + "journal": null + }, + { + "collectedfrom": [ + { + "key": "10|opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf3", + "value": "AMS Acta", + "dataInfo": null + } + ], + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + }, + "lastupdatetimestamp": 1606898557407, + "relType": "resultProject", + "subRelType": "outcome", + "relClass": "isProducedBy", + "source": "50|od_________3::4f7038e665fdd3800297735f087a530c", + "target": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d", + "validated": null, + "validationDate": null, + "properties": [] + }, + { + "collectedfrom": [ + { + "key": "10|opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf3", + "value": "AMS Acta", + "dataInfo": null + } + ], + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": "", + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + }, + "lastupdatetimestamp": 1606898557407, + "relType": "resultProject", + "subRelType": "outcome", + "relClass": "produces", + "source": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d", + "target": "50|od_________3::4f7038e665fdd3800297735f087a530c", + "validated": null, + "validationDate": null, + "properties": [] + } + From 57f448b7a423030f7d745e80a6fc7100ed480e57 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 2 Dec 2020 10:44:05 +0100 Subject: [PATCH 4/9] graph cleaning workflow separate orcid_pending from orcid, depending on the author pid provenance --- .../dhp/schema/common/ModelConstants.java | 3 +++ .../dhp/oa/graph/clean/CleaningFunctions.java | 25 ++++++++++++++++++- .../eu/dnetlib/dhp/oa/graph/clean/result.json | 22 ++++++++++++++++ .../eu/dnetlib/dhp/oa/graph/clean/terms.txt | 1 + 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index d759f0d55..0b4d29c8e 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -7,6 +7,9 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier; public class ModelConstants { + public static final String ORCID = "orcid"; + public static final String ORCID_PENDING = "orcid_pending"; + public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies"; public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies"; public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource"; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java index 5155d0242..945f717bb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java @@ -189,6 +189,14 @@ public class CleaningFunctions { author.setRank(i++); } } + + final Set collectedFrom = Optional + .ofNullable(r.getCollectedfrom()) + .map(c -> c.stream() + .map(KeyValue::getKey) + .collect(Collectors.toCollection(HashSet::new))) + .orElse(new HashSet<>()); + for (Author a : r.getAuthor()) { if (Objects.isNull(a.getPid())) { a.setPid(Lists.newArrayList()); @@ -201,13 +209,28 @@ public class CleaningFunctions { .filter(p -> Objects.nonNull(p.getQualifier())) .filter(p -> StringUtils.isNotBlank(p.getValue())) .map(p -> { + // hack to distinguish orcid from orcid_pending + String pidProvenance = Optional + .ofNullable(p.getDataInfo()) + .map(d -> Optional + .ofNullable(d.getProvenanceaction()) + .map(Qualifier::getClassid) + .orElse("")) + .orElse(""); + if (pidProvenance.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) { + p.getQualifier().setClassid(ModelConstants.ORCID); + } else { + p.getQualifier().setClassid(ModelConstants.ORCID_PENDING); + } p.setValue(p.getValue().trim().replaceAll(ORCID_PREFIX_REGEX, "")); return p; }) .collect( Collectors .toMap( - StructuredProperty::getValue, Function.identity(), (p1, p2) -> p1, + p -> p.getQualifier().getClassid() + p.getValue(), + Function.identity(), + (p1, p2) -> p1, LinkedHashMap::new)) .values() .stream() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index 5c903cd0e..e746d236e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -49,6 +49,28 @@ "schemename": "dnet:pid_types" }, "value": "https://orcid.org/0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "orcid", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" } ], "rank": 1, diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index 93cc00eca..67c070d1d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1031,6 +1031,7 @@ dnet:pid_types @=@ dnet:pid_types @=@ jprn @=@ JPRN Identifier dnet:pid_types @=@ dnet:pid_types @=@ mag_id @=@ Microsoft Academic Graph Identifier dnet:pid_types @=@ dnet:pid_types @=@ oai @=@ Open Archives Initiative dnet:pid_types @=@ dnet:pid_types @=@ orcid @=@ Open Researcher and Contributor ID +dnet:pid_types @=@ dnet:pid_types @=@ orcid_pending @=@ Open Researcher and Contributor ID dnet:pid_types @=@ dnet:pid_types @=@ PANGAEA @=@ PANGAEA dnet:pid_types @=@ dnet:pid_types @=@ epo_nr_epodoc @=@ Patent application number in EPODOC format dnet:pid_types @=@ dnet:pid_types @=@ UNKNOWN @=@ UNKNOWN From 51c582c08ccceaf428aa5ff4998afa4a2cead26a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 2 Dec 2020 11:12:54 +0100 Subject: [PATCH 5/9] added orcid class name among the constants set --- .../main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index 0b4d29c8e..1efa86586 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -9,6 +9,7 @@ public class ModelConstants { public static final String ORCID = "orcid"; public static final String ORCID_PENDING = "orcid_pending"; + public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID"; public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies"; public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies"; From cd285e98bc255e153dac982665e08698af39f10f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 2 Dec 2020 11:13:23 +0100 Subject: [PATCH 6/9] usoing the constants defined in the ModelConstants class --- .../src/main/java/eu/dnetlib/dhp/PropagationConstant.java | 4 +--- .../PrepareResultOrcidAssociationStep1.java | 5 +++-- .../SparkOrcidToResultFromSemRelJob.java | 7 ++++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index d38d79fec..360cf5ffa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -44,9 +44,7 @@ public class PropagationConstant { public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; - public static final String PROPAGATION_AUTHOR_PID_CLASSID = "orcid_pending"; - public static final String ORCID = "orcid"; - public static final String PROPAGATION_AUTHOR_PID_CLASSNAME = "Open Researcher and Contributor ID"; + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 1e445828c..99ae1ee2d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -102,8 +103,8 @@ public class PrepareResultOrcidAssociationStep1 { + " FROM result " + " LATERAL VIEW EXPLODE (author) a AS MyT " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " - + " WHERE lower(MyP.qualifier.classid) = '" + ORCID + "' or " - +" lower(MyP.qalifier.classid) = '" + PROPAGATION_AUTHOR_PID_CLASSID + "') tmp " + + " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or " + +" lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp " + " GROUP BY id) r_t " + " JOIN (" + " SELECT source, target " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 3e5c4d641..55f18007d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -176,7 +177,7 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID_CLASSID, PROPAGATION_AUTHOR_PID_CLASSNAME)); + p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME)); p .setDataInfo( getDataInfo( @@ -201,8 +202,8 @@ public class SparkOrcidToResultFromSemRelJob { return false; } for (StructuredProperty pid : pids.get()) { - if (PROPAGATION_AUTHOR_PID_CLASSID.equals(pid.getQualifier().getClassid().toLowerCase()) || - ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { + if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) || + ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { return true; } } From 74242e450e6d288db3275ce8709e4ae0f2815051 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 2 Dec 2020 11:23:35 +0100 Subject: [PATCH 7/9] using constants from ModelConstants --- .../orcidtoresultfromsemrel/OrcidPropagationJobTest.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index aeaa8a3c1..ba50f9bc5 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -5,8 +5,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver; -import eu.dnetlib.dhp.PropagationConstant; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -21,8 +19,11 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Dataset; public class OrcidPropagationJobTest { @@ -169,7 +170,7 @@ public class OrcidPropagationJobTest { .filter( "id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' " + "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" + - PropagationConstant.PROPAGATION_AUTHOR_PID_CLASSID +"'") + ModelConstants.ORCID_PENDING + "'") .count()); Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count()); From cfb55effd9f26d9143a0ccaea01906bd2b87c7ab Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 2 Dec 2020 11:23:49 +0100 Subject: [PATCH 8/9] code formatting --- .../orcidnodoi/oaf/PublicationToOaf.java | 12 ++++---- .../eu/dnetlib/dhp/PropagationConstant.java | 3 -- .../PrepareResultOrcidAssociationStep1.java | 4 +-- .../SparkOrcidToResultFromSemRelJob.java | 4 +-- .../dhp/oa/graph/clean/CleaningFunctions.java | 29 ++++++++++--------- .../oa/provision/XmlRecordFactoryTest.java | 24 +++++++-------- 6 files changed, 36 insertions(+), 40 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 18fecc6c2..1aed66dfd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -531,12 +531,12 @@ public class PublicationToOaf implements Serializable { dataInfo.setInferred(false); dataInfo.setTrust("0.9"); dataInfo - .setProvenanceaction( - mapQualifier( - "sysimport:crosswalk:entityregistry", - "Harvested", - "dnet:provenanceActions", - "dnet:provenanceActions")); + .setProvenanceaction( + mapQualifier( + "sysimport:crosswalk:entityregistry", + "Harvested", + "dnet:provenanceActions", + "dnet:provenanceActions")); sp.setDataInfo(dataInfo); return sp; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 360cf5ffa..692605b03 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -44,9 +44,6 @@ public class PropagationConstant { public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; - - - public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final String cfHbforResultQuery = "select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 99ae1ee2d..dd8342980 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -23,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; @@ -104,7 +104,7 @@ public class PrepareResultOrcidAssociationStep1 { + " LATERAL VIEW EXPLODE (author) a AS MyT " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " + " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or " - +" lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp " + + " lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp " + " GROUP BY id) r_t " + " JOIN (" + " SELECT source, target " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 55f18007d..9f08fe580 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.List; import java.util.Optional; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -24,6 +23,7 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.PacePerson; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; @@ -203,7 +203,7 @@ public class SparkOrcidToResultFromSemRelJob { } for (StructuredProperty pid : pids.get()) { if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) || - ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { + ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { return true; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java index 945f717bb..2a6fd3a1d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java @@ -191,11 +191,13 @@ public class CleaningFunctions { } final Set collectedFrom = Optional - .ofNullable(r.getCollectedfrom()) - .map(c -> c.stream() - .map(KeyValue::getKey) - .collect(Collectors.toCollection(HashSet::new))) - .orElse(new HashSet<>()); + .ofNullable(r.getCollectedfrom()) + .map( + c -> c + .stream() + .map(KeyValue::getKey) + .collect(Collectors.toCollection(HashSet::new))) + .orElse(new HashSet<>()); for (Author a : r.getAuthor()) { if (Objects.isNull(a.getPid())) { @@ -211,12 +213,13 @@ public class CleaningFunctions { .map(p -> { // hack to distinguish orcid from orcid_pending String pidProvenance = Optional - .ofNullable(p.getDataInfo()) - .map(d -> Optional - .ofNullable(d.getProvenanceaction()) - .map(Qualifier::getClassid) - .orElse("")) - .orElse(""); + .ofNullable(p.getDataInfo()) + .map( + d -> Optional + .ofNullable(d.getProvenanceaction()) + .map(Qualifier::getClassid) + .orElse("")) + .orElse(""); if (pidProvenance.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) { p.getQualifier().setClassid(ModelConstants.ORCID); } else { @@ -229,8 +232,8 @@ public class CleaningFunctions { Collectors .toMap( p -> p.getQualifier().getClassid() + p.getValue(), - Function.identity(), - (p1, p2) -> p1, + Function.identity(), + (p1, p2) -> p1, LinkedHashMap::new)) .values() .stream() diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 619df7716..e84f97836 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -7,11 +7,6 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -20,13 +15,18 @@ import org.dom4j.io.SAXReader; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.mockito.Mock; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; -import org.mockito.Mock; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; //TODO to enable it we need to update the joined_entity.json test file //@Disabled @@ -44,34 +44,30 @@ public class XmlRecordFactoryTest { assertNotNull(je); Document doc = buildXml(je); - ////TODO specific test assertion on doc + //// TODO specific test assertion on doc } - - @Test void testBologna() throws IOException, DocumentException { final String json = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.json")); - Publication oaf = new ObjectMapper().readValue(json, Publication.class); + Publication oaf = new ObjectMapper().readValue(json, Publication.class); assertNotNull(oaf); JoinedEntity je = new JoinedEntity(); je.setEntity(oaf); assertNotNull(je); Document doc = buildXml(je); - //TODO specific test assertion on doc + // TODO specific test assertion on doc System.out.println(doc.asXML()); - - } private Document buildXml(JoinedEntity je) throws DocumentException { ContextMapper contextMapper = new ContextMapper(); XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + otherDsTypeId); String xml = xmlRecordFactory.build(je); From 6ba8037cc7926f0fd8b00e31ac6ce39ef18b1f7f Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 2 Dec 2020 11:34:46 +0100 Subject: [PATCH 9/9] fixed failure to test due to changing of input --- .../java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index a3bb2a4f4..4568e23a5 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -158,7 +158,7 @@ class CrossrefMappingTest { rels.foreach(s => logger.info(s.getTarget)) - assertEquals(rels.size, 3 ) + assertEquals(rels.size, 6 ) }