minor changes

This commit is contained in:
miconis 2019-10-25 12:52:28 +02:00
commit 9fa5aebe9c
5 changed files with 145 additions and 50 deletions

View File

@ -206,6 +206,7 @@ public class ProtoConverter implements Serializable {
.collect(Collectors.toList())) .collect(Collectors.toList()))
.setCodeRepositoryUrl(ProtoUtils.mapStringField(m.getCodeRepositoryUrl())) .setCodeRepositoryUrl(ProtoUtils.mapStringField(m.getCodeRepositoryUrl()))
.setProgrammingLanguage(ProtoUtils.mapQualifier(m.getProgrammingLanguage())); .setProgrammingLanguage(ProtoUtils.mapQualifier(m.getProgrammingLanguage()));
} }
private static OtherResearchProducts createORP(OafProtos.Oaf oaf) { private static OtherResearchProducts createORP(OafProtos.Oaf oaf) {

View File

@ -1,22 +1,16 @@
package eu.dnetlib.dhp.graph; package eu.dnetlib.dhp.graph;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Publication;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import scala.Tuple2; import scala.Tuple2;
import javax.xml.crypto.Data;
public class SparkGraphImporterJob { public class SparkGraphImporterJob {
@ -41,6 +35,12 @@ public class SparkGraphImporterJob {
.map(item -> new Tuple2<>(item._1.toString(), item._2.toString())); .map(item -> new Tuple2<>(item._1.toString(), item._2.toString()));
final String body = inputRDD.filter(s -> s._1().contains("20|") && s._1().split("@")[2].equalsIgnoreCase("body")).map(Tuple2::_2).first();
System.out.println(body);
final JavaRDD<Organization> organization = inputRDD final JavaRDD<Organization> organization = inputRDD
.filter(s -> s._1().split("@")[2].equalsIgnoreCase("body")) .filter(s -> s._1().split("@")[2].equalsIgnoreCase("body"))
.map(Tuple2::_2) .map(Tuple2::_2)

View File

@ -2,8 +2,11 @@ package eu.dnetlib.dhp.graph;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import org.junit.Test; import org.junit.Test;
import scala.tools.nsc.doc.model.Public; import scala.tools.nsc.doc.model.Public;
@ -12,10 +15,9 @@ public class ProtoConverterTest {
@Test @Test
public void convertDatasourceTest() throws Exception { public void convertDatasourceTest() throws Exception {
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/organization.json")); final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/datasource.json"));
Oaf result = ProtoConverter.convert(json); Oaf result = ProtoConverter.convert(json);
assertNotNull(result); assertNotNull(result);
assertTrue(result instanceof Datasource); assertTrue(result instanceof Datasource);
Datasource ds = (Datasource) result; Datasource ds = (Datasource) result;
@ -24,6 +26,25 @@ public class ProtoConverterTest {
System.out.println(ds.getId()); System.out.println(ds.getId());
ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(result));
}
@Test
public void convertOrganizationTest() throws Exception {
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/organization.json"));
Oaf result = ProtoConverter.convert(json);
assertNotNull(result);
assertTrue(result instanceof Organization);
Organization ds = (Organization) result;
assertNotNull(ds.getId());
System.out.println(ds.getId());
ObjectMapper mapper = new ObjectMapper(); ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(result)); System.out.println(mapper.writeValueAsString(result));

View File

@ -0,0 +1,73 @@
{
"kind": "entity",
"entity": {
"type": "datasource",
"datasource": {
"metadata": {
"officialname": {
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
},
"englishname": {
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
},
"websiteurl": {
"value": "https://cris.uns.ac.rs/"
},
"accessinfopackage": [
{
"value": "https://cris.uns.ac.rs/OAIHandlerOpenAIRECRIS"
}
],
"namespaceprefix": {
"value": "CrisUnsNoviS"
},
"datasourcetype": {
"classid": "crissystem",
"classname": "CRIS System",
"schemeid": "dnet:datasource_typologies",
"schemename": "dnet:datasource_typologies"
},
"openairecompatibility": {
"classid": "openaire-cris_1.1",
"classname": "OpenAIRE CRIS v1.1",
"schemeid": "dnet:datasourceCompatibilityLevel",
"schemename": "dnet:datasourceCompatibilityLevel"
},
"latitude": {
"value": "0.0"
},
"longitude": {
"value": "0.0"
},
"journal": {
"issnPrinted": "",
"issnOnline": "",
"issnLinking": ""
}
}
},
"originalId": [
"CRIS_UNS____::openaire"
],
"collectedfrom": [
{
"key": "",
"value": ""
}
],
"dateofcollection": "2019-04-04",
"id": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556",
"dateoftransformation": ""
},
"dataInfo": {
"inferred": false,
"deletedbyinference": false,
"trust": "0.9",
"provenanceaction": {
"classid": "sysimport:crosswalk:entityregistry",
"classname": "sysimport:crosswalk:entityregistry",
"schemeid": "dnet:provenance_actions",
"schemename": "dnet:provenance_actions"
}
}
}

View File

@ -1,68 +1,68 @@
{ {
"kind": "entity", "kind": "entity",
"entity": { "entity": {
"type": "datasource", "type": "organization",
"datasource": { "organization": {
"metadata": { "metadata": {
"officialname": { "legalname": {
"value": "CRIS UNS (Current Research Information System University of Novi Sad)" "value": "University of Utrecht"
}, },
"englishname": { "eclegalbody": {
"value": "CRIS UNS (Current Research Information System University of Novi Sad)" "value": "false"
}, },
"websiteurl": { "eclegalperson": {
"value": "https://cris.uns.ac.rs/" "value": "false"
}, },
"accessinfopackage": [ "ecnonprofit": {
{ "value": "false"
"value": "https://cris.uns.ac.rs/OAIHandlerOpenAIRECRIS"
}
],
"namespaceprefix": {
"value": "CrisUnsNoviS"
}, },
"datasourcetype": { "ecresearchorganization": {
"classid": "crissystem", "value": "false"
"classname": "CRIS System",
"schemeid": "dnet:datasource_typologies",
"schemename": "dnet:datasource_typologies"
}, },
"openairecompatibility": { "echighereducation": {
"classid": "openaire-cris_1.1", "value": "false"
"classname": "OpenAIRE CRIS v1.1",
"schemeid": "dnet:datasourceCompatibilityLevel",
"schemename": "dnet:datasourceCompatibilityLevel"
}, },
"latitude": { "ecinternationalorganizationeurinterests": {
"value": "0.0" "value": "false"
}, },
"longitude": { "ecinternationalorganization": {
"value": "0.0" "value": "false"
}, },
"journal": { "ecenterprise": {
"issnPrinted": "", "value": "false"
"issnOnline": "", },
"issnLinking": "" "ecsmevalidated": {
"value": "false"
},
"ecnutscode": {
"value": "false"
},
"country": {
"classid": "FI",
"classname": "Finland",
"schemeid": "dnet:countries",
"schemename": "dnet:countries"
} }
} }
}, },
"originalId": [ "originalId": [
"CRIS_UNS____::openaire" "aka_________::f88cc5f874ff27f0fd6e7cb24842e9fb"
], ],
"collectedfrom": [ "collectedfrom": [
{ {
"key": "", "key": "10|openaire____::6ac933301a3933c8a22ceebea7000326",
"value": "" "value": "Academy of Finland"
} }
], ],
"dateofcollection": "2019-04-04", "dateofcollection": "2018-09-28",
"id": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556", "id": "20|aka_________::0070a5080d7092f960fb33c8a9fca016",
"dateoftransformation": "" "dateoftransformation": "2019-04-16"
}, },
"dataInfo": { "dataInfo": {
"inferred": false, "inferred": true,
"deletedbyinference": false, "deletedbyinference": true,
"trust": "0.9", "trust": "0.9",
"inferenceprovenance": "dedup-similarity-organization-simple",
"provenanceaction": { "provenanceaction": {
"classid": "sysimport:crosswalk:entityregistry", "classid": "sysimport:crosswalk:entityregistry",
"classname": "sysimport:crosswalk:entityregistry", "classname": "sysimport:crosswalk:entityregistry",