Merge pull request 'validatedLinksToProjects' (#93) from validatedLinksToProjects into master

LGTM
This commit is contained in:
Claudio Atzori 2021-02-10 12:32:35 +01:00
commit 73393d3c4d
12 changed files with 276 additions and 6 deletions

View File

@ -334,6 +334,7 @@ public class MappersTest {
assertValidId(p.getCollectedfrom().get(0).getKey());
System.out.println(p.getTitle().get(0).getValue());
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
}
@Test

View File

@ -46,6 +46,7 @@
<oaf:collectedFrom id="openaire____::crossref" name="Crossref"/>
<oaf:identifier identifierType="doi">10.1080/23744235.2020.1774644</oaf:identifier>
<oaf:journal eissn="2374-4243" ep="3" iss="" issn="2374-4235" sp="1" vol="">Infectious Diseases</oaf:journal>
<oaf:projectid validationDate="2020-12-07T11:15:59.627Z">corda__h2020::814530</oaf:projectid>
</metadata>
<about xmlns:oai="http://www.openarchives.org/OAI/2.0/">
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">

View File

@ -8,7 +8,6 @@ import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
@ -28,6 +27,7 @@ import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2;

View File

@ -14,7 +14,8 @@ public class GraphMappingUtils {
public static final String SEPARATOR = "_";
public static Set<String> authorPidTypes = Sets.newHashSet(
public static Set<String> authorPidTypes = Sets
.newHashSet(
ModelConstants.ORCID, ModelConstants.ORCID_PENDING, "magidentifier");
public static String removePrefix(final String s) {

View File

@ -73,7 +73,9 @@ public class TemplateFactory {
final Collection<String> fields,
final String semanticclass,
final String semantischeme,
final DataInfo info) {
final DataInfo info,
final boolean validated,
final String validationDate) {
return getTemplate(resources.getRel())
.add("type", type)
.add("objIdentifier", escapeXml(removePrefix(objIdentifier)))
@ -86,6 +88,8 @@ public class TemplateFactory {
.add(
"provenanceaction",
info.getProvenanceaction() != null ? info.getProvenanceaction().getClassid() : "")
.add("validated", validated)
.add("validationdate", validationDate)
.render();
}

View File

@ -1096,7 +1096,8 @@ public class XmlRecordFactory implements Serializable {
final HashSet<String> fields = Sets.newHashSet(mapFields(link, contexts));
return templateFactory
.getRel(
targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo());
targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(),
rel.getValidationDate());
}
private List<String> listChildren(

View File

@ -1,4 +1,5 @@
<rel inferred="$inferred$" trust="$trust$" inferenceprovenance="$inferenceprovenance$" provenanceaction="$provenanceaction$">
<to class="$class$" scheme="$scheme$" type="$type$">$objIdentifier$</to>
$if(validated)$<validated date="$validationdate$"/>$else$$endif$
<to class="$class$" scheme="$scheme$" type="$type$">$objIdentifier$</to>
$metadata:{ it | $it$ }$
</rel>

View File

@ -5,22 +5,27 @@ import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class XmlRecordFactoryTest {
@ -58,4 +63,67 @@ public class XmlRecordFactoryTest {
// TODO add assertions based of values extracted from the XML record
}
@Test
public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException {
ContextMapper contextMapper = new ContextMapper();
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
otherDsTypeId);
Publication p = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
Project pj = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class);
Relation rel = OBJECT_MAPPER
.readValue(
(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json"))), Relation.class);
RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
List<RelatedEntityWrapper> links = Lists.newArrayList();
RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
links.add(rew);
JoinedEntity je = new JoinedEntity<>(p);
je.setLinks(links);
String xml = xmlRecordFactory.build(je);
assertNotNull(xml);
Document doc = new SAXReader().read(new StringReader(xml));
assertNotNull(doc);
System.out.println(doc.asXML());
Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date"));
}
@Test
public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException {
ContextMapper contextMapper = new ContextMapper();
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
otherDsTypeId);
Publication p = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
Project pj = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class);
Relation rel = OBJECT_MAPPER
.readValue((IOUtils.toString(getClass().getResourceAsStream("relToProject.json"))), Relation.class);
RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
List<RelatedEntityWrapper> links = Lists.newArrayList();
RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
links.add(rew);
JoinedEntity je = new JoinedEntity<>(p);
je.setLinks(links);
String xml = xmlRecordFactory.build(je);
assertNotNull(xml);
Document doc = new SAXReader().read(new StringReader(xml));
assertNotNull(doc);
System.out.println(doc.asXML());
assertEquals("", doc.valueOf("//rel/validated"));
}
}

View File

@ -0,0 +1,109 @@
{
"id": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d",
"originalId": [],
"pid": [],
"dateofcollection": "2020-01-01",
"dateoftransformation": "2020-01-01",
"extraInfo": [],
"oaiprovenance": null,
"websiteurl": {
"value": "https://web.site",
"datainfo": null
},
"code": {
"value": "79a0e",
"datainfo": null
},
"acronym": {
"value": "79a0e_acronym",
"datainfo": null
},
"title": {
"value": "79a0e_title",
"datainfo": null
},
"startdate": {
"value": "2019-02-01",
"datainfo": null
},
"enddate": {
"value": "2021-01-09",
"datainfo": null
},
"callidentifier": {
"value": "79a0e_callID",
"datainfo": null
},
"keywords": {
"value": "",
"datainfo": null
},
"duration": {
"value": "",
"datainfo": null
},
"ecsc39": {
"value": "true",
"datainfo": null
},
"oamandatepublications": {
"value": "true",
"datainfo": null
},
"ecarticle29_3": {
"value": "false",
"datainfo": null
},
"optional1": {
"value": "",
"datainfo": null
},
"optional2": {
"value": "",
"datainfo": null
},
"jsonextrainfo":{
"value": "",
"datainfo": null
},
"contactfullname":{
"value": "",
"datainfo": null
},
"contactfax": {
"value": "",
"datainfo": null
},
"contactphone": {
"value": "",
"datainfo": null
},
"contactemail": {
"value": "",
"datainfo": null
},
"summary": {
"value": "79a0e_description",
"datainfo": null
},
"currency": {
"value": "EUR",
"datainfo": null
},
"totalcost": 120000,
"fundedamount": 18000,
"h2020topiccode": "",
"h2020topicdescription": "",
"h2020classification": [],
"subjects": [
{
"value": "",
"qualifier": null,
"datainfo": null
}
],
"fundingtree": []
}

View File

@ -294,6 +294,28 @@
>rcuk________::23feba2a5ca7f6b6016bf3a45180da50</to>
<legalname>University of Delhi</legalname>
</rel>
<rel inferred="false" trust="0.9" inferenceprovenance="" provenanceaction="sysimport:crosswalk:repository">
<validated date="2021-01-01">true</validated>
<to class="isProducedBy" scheme="dnet:result_project_relations" type="project">corda_______::30c6b5ab90f30666de1d112fb93d8c77</to>
<code>227878</code>
<funding>
<funder id="ec__________::EC" shortname="EC" name="European Commission" jurisdiction="EU" />
<funding_level_0 name="FP7">ec__________::EC::FP7</funding_level_0>
<funding_level_1 name="SP2">ec__________::EC::FP7::SP2</funding_level_1>
<funding_level_2 name="ERC">ec__________::EC::FP7::SP2::ERC</funding_level_2>
</funding>
<title>Complex structure and dynamics of collective motion</title>
<acronym>COLLMOT</acronym>
<contracttype classid="ERC" classname="Support for frontier research (ERC)" schemeid="ec:FP7contractTypes" schemename="ec:FP7contractTypes" />
</rel>
<rel inferred="true" trust="0.72" inferenceprovenance="iis::document_referencedProjects" provenanceaction="iis">
<to class="isProducedBy" scheme="dnet:result_project_relations" type="project">irb_hr______::2330a1d0dac71ffbe15fbcbc807288d4</to>
<code>108-1083570-3635</code>
<funding>
<funder id="irb_hr______::MZOS" shortname="MZOS" name="Ministry of Science, Education and Sports of the Republic of Croatia (MSES)" jurisdiction="HR" />
</funding>
<title>Pentadecapeptide BPC 157 - further investigations</title>
</rel>
</rels>
<children>
<instance id="openaire____::55045bd2a65019fd8e6741a755395c8c">

View File

@ -0,0 +1,31 @@
{
"collectedfrom": [
{
"key": "10|opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf3",
"value": "AMS Acta",
"dataInfo": null
}
],
"dataInfo": {
"invisible": false,
"inferred": false,
"deletedbyinference": false,
"trust": "0.9",
"inferenceprovenance": "",
"provenanceaction": {
"classid": "sysimport:crosswalk:repository",
"classname": "sysimport:crosswalk:repository",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
},
"lastupdatetimestamp": 1606898557407,
"relType": "resultProject",
"subRelType": "outcome",
"relClass": "isProducedBy",
"source": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c",
"target": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d",
"validated": false,
"validationDate": null,
"properties": []
}

View File

@ -0,0 +1,31 @@
{
"collectedfrom": [
{
"key": "10|opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf3",
"value": "AMS Acta",
"dataInfo": null
}
],
"dataInfo": {
"invisible": false,
"inferred": false,
"deletedbyinference": false,
"trust": "0.9",
"inferenceprovenance": "",
"provenanceaction": {
"classid": "sysimport:crosswalk:repository",
"classname": "sysimport:crosswalk:repository",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
},
"lastupdatetimestamp": 1606898557407,
"relType": "resultProject",
"subRelType": "outcome",
"relClass": "isProducedBy",
"source": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c",
"target": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d",
"validated": true,
"validationDate": "2021-01-01",
"properties": []
}