diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java index 600663730..a300d92c9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java @@ -206,6 +206,7 @@ public class ProtoConverter implements Serializable { .collect(Collectors.toList())) .setCodeRepositoryUrl(ProtoUtils.mapStringField(m.getCodeRepositoryUrl())) .setProgrammingLanguage(ProtoUtils.mapQualifier(m.getProgrammingLanguage())); + } private static OtherResearchProducts createORP(OafProtos.Oaf oaf) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java index cdce67628..2b9c1f60b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java @@ -1,22 +1,16 @@ package eu.dnetlib.dhp.graph; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Organization; -import eu.dnetlib.dhp.schema.oaf.Publication; import org.apache.hadoop.io.Text; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import scala.Tuple2; -import javax.xml.crypto.Data; - public class SparkGraphImporterJob { @@ -41,6 +35,12 @@ public class SparkGraphImporterJob { .map(item -> new Tuple2<>(item._1.toString(), item._2.toString())); + + final String body = inputRDD.filter(s -> s._1().contains("20|") && s._1().split("@")[2].equalsIgnoreCase("body")).map(Tuple2::_2).first(); + + System.out.println(body); + + final JavaRDD organization = inputRDD .filter(s -> s._1().split("@")[2].equalsIgnoreCase("body")) .map(Tuple2::_2) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java index 4f68ca446..f16ab5ccb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java @@ -2,8 +2,11 @@ package eu.dnetlib.dhp.graph; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.*; + import org.apache.commons.io.IOUtils; + import static org.junit.Assert.*; + import org.junit.Test; import scala.tools.nsc.doc.model.Public; @@ -12,10 +15,9 @@ public class ProtoConverterTest { @Test public void convertDatasourceTest() throws Exception { - final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/organization.json")); + final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/datasource.json")); Oaf result = ProtoConverter.convert(json); - assertNotNull(result); assertTrue(result instanceof Datasource); Datasource ds = (Datasource) result; @@ -24,6 +26,25 @@ public class ProtoConverterTest { System.out.println(ds.getId()); + ObjectMapper mapper = new ObjectMapper(); + System.out.println(mapper.writeValueAsString(result)); + } + + + @Test + public void convertOrganizationTest() throws Exception { + + final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/organization.json")); + + Oaf result = ProtoConverter.convert(json); + assertNotNull(result); + assertTrue(result instanceof Organization); + Organization ds = (Organization) result; + assertNotNull(ds.getId()); + + System.out.println(ds.getId()); + + ObjectMapper mapper = new ObjectMapper(); System.out.println(mapper.writeValueAsString(result)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/datasource.json new file mode 100644 index 000000000..e2c382c0a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/datasource.json @@ -0,0 +1,73 @@ +{ + "kind": "entity", + "entity": { + "type": "datasource", + "datasource": { + "metadata": { + "officialname": { + "value": "CRIS UNS (Current Research Information System University of Novi Sad)" + }, + "englishname": { + "value": "CRIS UNS (Current Research Information System University of Novi Sad)" + }, + "websiteurl": { + "value": "https://cris.uns.ac.rs/" + }, + "accessinfopackage": [ + { + "value": "https://cris.uns.ac.rs/OAIHandlerOpenAIRECRIS" + } + ], + "namespaceprefix": { + "value": "CrisUnsNoviS" + }, + "datasourcetype": { + "classid": "crissystem", + "classname": "CRIS System", + "schemeid": "dnet:datasource_typologies", + "schemename": "dnet:datasource_typologies" + }, + "openairecompatibility": { + "classid": "openaire-cris_1.1", + "classname": "OpenAIRE CRIS v1.1", + "schemeid": "dnet:datasourceCompatibilityLevel", + "schemename": "dnet:datasourceCompatibilityLevel" + }, + "latitude": { + "value": "0.0" + }, + "longitude": { + "value": "0.0" + }, + "journal": { + "issnPrinted": "", + "issnOnline": "", + "issnLinking": "" + } + } + }, + "originalId": [ + "CRIS_UNS____::openaire" + ], + "collectedfrom": [ + { + "key": "", + "value": "" + } + ], + "dateofcollection": "2019-04-04", + "id": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556", + "dateoftransformation": "" + }, + "dataInfo": { + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenance_actions", + "schemename": "dnet:provenance_actions" + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/organization.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/organization.json index e2c382c0a..1f8ab1950 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/organization.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/organization.json @@ -1,68 +1,68 @@ { "kind": "entity", "entity": { - "type": "datasource", - "datasource": { + "type": "organization", + "organization": { "metadata": { - "officialname": { - "value": "CRIS UNS (Current Research Information System University of Novi Sad)" + "legalname": { + "value": "University of Utrecht" }, - "englishname": { - "value": "CRIS UNS (Current Research Information System University of Novi Sad)" + "eclegalbody": { + "value": "false" }, - "websiteurl": { - "value": "https://cris.uns.ac.rs/" + "eclegalperson": { + "value": "false" }, - "accessinfopackage": [ - { - "value": "https://cris.uns.ac.rs/OAIHandlerOpenAIRECRIS" - } - ], - "namespaceprefix": { - "value": "CrisUnsNoviS" + "ecnonprofit": { + "value": "false" }, - "datasourcetype": { - "classid": "crissystem", - "classname": "CRIS System", - "schemeid": "dnet:datasource_typologies", - "schemename": "dnet:datasource_typologies" + "ecresearchorganization": { + "value": "false" }, - "openairecompatibility": { - "classid": "openaire-cris_1.1", - "classname": "OpenAIRE CRIS v1.1", - "schemeid": "dnet:datasourceCompatibilityLevel", - "schemename": "dnet:datasourceCompatibilityLevel" + "echighereducation": { + "value": "false" }, - "latitude": { - "value": "0.0" + "ecinternationalorganizationeurinterests": { + "value": "false" }, - "longitude": { - "value": "0.0" + "ecinternationalorganization": { + "value": "false" }, - "journal": { - "issnPrinted": "", - "issnOnline": "", - "issnLinking": "" + "ecenterprise": { + "value": "false" + }, + "ecsmevalidated": { + "value": "false" + }, + "ecnutscode": { + "value": "false" + }, + "country": { + "classid": "FI", + "classname": "Finland", + "schemeid": "dnet:countries", + "schemename": "dnet:countries" } } }, "originalId": [ - "CRIS_UNS____::openaire" + "aka_________::f88cc5f874ff27f0fd6e7cb24842e9fb" ], "collectedfrom": [ { - "key": "", - "value": "" + "key": "10|openaire____::6ac933301a3933c8a22ceebea7000326", + "value": "Academy of Finland" } ], - "dateofcollection": "2019-04-04", - "id": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556", - "dateoftransformation": "" + "dateofcollection": "2018-09-28", + "id": "20|aka_________::0070a5080d7092f960fb33c8a9fca016", + "dateoftransformation": "2019-04-16" }, "dataInfo": { - "inferred": false, - "deletedbyinference": false, + "inferred": true, + "deletedbyinference": true, "trust": "0.9", + "inferenceprovenance": "dedup-similarity-organization-simple", "provenanceaction": { "classid": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry", @@ -70,4 +70,4 @@ "schemename": "dnet:provenance_actions" } } -} \ No newline at end of file +}