minor changes

This commit is contained in:
miconis 2019-10-25 12:52:28 +02:00
commit 9fa5aebe9c
5 changed files with 145 additions and 50 deletions

View File

@ -206,6 +206,7 @@ public class ProtoConverter implements Serializable {
.collect(Collectors.toList()))
.setCodeRepositoryUrl(ProtoUtils.mapStringField(m.getCodeRepositoryUrl()))
.setProgrammingLanguage(ProtoUtils.mapQualifier(m.getProgrammingLanguage()));
}
private static OtherResearchProducts createORP(OafProtos.Oaf oaf) {

View File

@ -1,22 +1,16 @@
package eu.dnetlib.dhp.graph;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Publication;
import org.apache.hadoop.io.Text;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
import javax.xml.crypto.Data;
public class SparkGraphImporterJob {
@ -41,6 +35,12 @@ public class SparkGraphImporterJob {
.map(item -> new Tuple2<>(item._1.toString(), item._2.toString()));
final String body = inputRDD.filter(s -> s._1().contains("20|") && s._1().split("@")[2].equalsIgnoreCase("body")).map(Tuple2::_2).first();
System.out.println(body);
final JavaRDD<Organization> organization = inputRDD
.filter(s -> s._1().split("@")[2].equalsIgnoreCase("body"))
.map(Tuple2::_2)

View File

@ -2,8 +2,11 @@ package eu.dnetlib.dhp.graph;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.io.IOUtils;
import static org.junit.Assert.*;
import org.junit.Test;
import scala.tools.nsc.doc.model.Public;
@ -12,10 +15,9 @@ public class ProtoConverterTest {
@Test
public void convertDatasourceTest() throws Exception {
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/organization.json"));
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/datasource.json"));
Oaf result = ProtoConverter.convert(json);
assertNotNull(result);
assertTrue(result instanceof Datasource);
Datasource ds = (Datasource) result;
@ -24,6 +26,25 @@ public class ProtoConverterTest {
System.out.println(ds.getId());
ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(result));
}
@Test
public void convertOrganizationTest() throws Exception {
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/organization.json"));
Oaf result = ProtoConverter.convert(json);
assertNotNull(result);
assertTrue(result instanceof Organization);
Organization ds = (Organization) result;
assertNotNull(ds.getId());
System.out.println(ds.getId());
ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(result));

View File

@ -0,0 +1,73 @@
{
"kind": "entity",
"entity": {
"type": "datasource",
"datasource": {
"metadata": {
"officialname": {
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
},
"englishname": {
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
},
"websiteurl": {
"value": "https://cris.uns.ac.rs/"
},
"accessinfopackage": [
{
"value": "https://cris.uns.ac.rs/OAIHandlerOpenAIRECRIS"
}
],
"namespaceprefix": {
"value": "CrisUnsNoviS"
},
"datasourcetype": {
"classid": "crissystem",
"classname": "CRIS System",
"schemeid": "dnet:datasource_typologies",
"schemename": "dnet:datasource_typologies"
},
"openairecompatibility": {
"classid": "openaire-cris_1.1",
"classname": "OpenAIRE CRIS v1.1",
"schemeid": "dnet:datasourceCompatibilityLevel",
"schemename": "dnet:datasourceCompatibilityLevel"
},
"latitude": {
"value": "0.0"
},
"longitude": {
"value": "0.0"
},
"journal": {
"issnPrinted": "",
"issnOnline": "",
"issnLinking": ""
}
}
},
"originalId": [
"CRIS_UNS____::openaire"
],
"collectedfrom": [
{
"key": "",
"value": ""
}
],
"dateofcollection": "2019-04-04",
"id": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556",
"dateoftransformation": ""
},
"dataInfo": {
"inferred": false,
"deletedbyinference": false,
"trust": "0.9",
"provenanceaction": {
"classid": "sysimport:crosswalk:entityregistry",
"classname": "sysimport:crosswalk:entityregistry",
"schemeid": "dnet:provenance_actions",
"schemename": "dnet:provenance_actions"
}
}
}

View File

@ -1,68 +1,68 @@
{
"kind": "entity",
"entity": {
"type": "datasource",
"datasource": {
"type": "organization",
"organization": {
"metadata": {
"officialname": {
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
"legalname": {
"value": "University of Utrecht"
},
"englishname": {
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
"eclegalbody": {
"value": "false"
},
"websiteurl": {
"value": "https://cris.uns.ac.rs/"
"eclegalperson": {
"value": "false"
},
"accessinfopackage": [
{
"value": "https://cris.uns.ac.rs/OAIHandlerOpenAIRECRIS"
}
],
"namespaceprefix": {
"value": "CrisUnsNoviS"
"ecnonprofit": {
"value": "false"
},
"datasourcetype": {
"classid": "crissystem",
"classname": "CRIS System",
"schemeid": "dnet:datasource_typologies",
"schemename": "dnet:datasource_typologies"
"ecresearchorganization": {
"value": "false"
},
"openairecompatibility": {
"classid": "openaire-cris_1.1",
"classname": "OpenAIRE CRIS v1.1",
"schemeid": "dnet:datasourceCompatibilityLevel",
"schemename": "dnet:datasourceCompatibilityLevel"
"echighereducation": {
"value": "false"
},
"latitude": {
"value": "0.0"
"ecinternationalorganizationeurinterests": {
"value": "false"
},
"longitude": {
"value": "0.0"
"ecinternationalorganization": {
"value": "false"
},
"journal": {
"issnPrinted": "",
"issnOnline": "",
"issnLinking": ""
"ecenterprise": {
"value": "false"
},
"ecsmevalidated": {
"value": "false"
},
"ecnutscode": {
"value": "false"
},
"country": {
"classid": "FI",
"classname": "Finland",
"schemeid": "dnet:countries",
"schemename": "dnet:countries"
}
}
},
"originalId": [
"CRIS_UNS____::openaire"
"aka_________::f88cc5f874ff27f0fd6e7cb24842e9fb"
],
"collectedfrom": [
{
"key": "",
"value": ""
"key": "10|openaire____::6ac933301a3933c8a22ceebea7000326",
"value": "Academy of Finland"
}
],
"dateofcollection": "2019-04-04",
"id": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556",
"dateoftransformation": ""
"dateofcollection": "2018-09-28",
"id": "20|aka_________::0070a5080d7092f960fb33c8a9fca016",
"dateoftransformation": "2019-04-16"
},
"dataInfo": {
"inferred": false,
"deletedbyinference": false,
"inferred": true,
"deletedbyinference": true,
"trust": "0.9",
"inferenceprovenance": "dedup-similarity-organization-simple",
"provenanceaction": {
"classid": "sysimport:crosswalk:entityregistry",
"classname": "sysimport:crosswalk:entityregistry",