forked from D-Net/dnet-hadoop
minor changes
This commit is contained in:
commit
9fa5aebe9c
|
@ -206,6 +206,7 @@ public class ProtoConverter implements Serializable {
|
||||||
.collect(Collectors.toList()))
|
.collect(Collectors.toList()))
|
||||||
.setCodeRepositoryUrl(ProtoUtils.mapStringField(m.getCodeRepositoryUrl()))
|
.setCodeRepositoryUrl(ProtoUtils.mapStringField(m.getCodeRepositoryUrl()))
|
||||||
.setProgrammingLanguage(ProtoUtils.mapQualifier(m.getProgrammingLanguage()));
|
.setProgrammingLanguage(ProtoUtils.mapQualifier(m.getProgrammingLanguage()));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static OtherResearchProducts createORP(OafProtos.Oaf oaf) {
|
private static OtherResearchProducts createORP(OafProtos.Oaf oaf) {
|
||||||
|
|
|
@ -1,22 +1,16 @@
|
||||||
package eu.dnetlib.dhp.graph;
|
package eu.dnetlib.dhp.graph;
|
||||||
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.apache.spark.api.java.function.PairFunction;
|
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoder;
|
import org.apache.spark.sql.Encoder;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
import javax.xml.crypto.Data;
|
|
||||||
|
|
||||||
public class SparkGraphImporterJob {
|
public class SparkGraphImporterJob {
|
||||||
|
|
||||||
|
|
||||||
|
@ -41,6 +35,12 @@ public class SparkGraphImporterJob {
|
||||||
.map(item -> new Tuple2<>(item._1.toString(), item._2.toString()));
|
.map(item -> new Tuple2<>(item._1.toString(), item._2.toString()));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
final String body = inputRDD.filter(s -> s._1().contains("20|") && s._1().split("@")[2].equalsIgnoreCase("body")).map(Tuple2::_2).first();
|
||||||
|
|
||||||
|
System.out.println(body);
|
||||||
|
|
||||||
|
|
||||||
final JavaRDD<Organization> organization = inputRDD
|
final JavaRDD<Organization> organization = inputRDD
|
||||||
.filter(s -> s._1().split("@")[2].equalsIgnoreCase("body"))
|
.filter(s -> s._1().split("@")[2].equalsIgnoreCase("body"))
|
||||||
.map(Tuple2::_2)
|
.map(Tuple2::_2)
|
||||||
|
|
|
@ -2,8 +2,11 @@ package eu.dnetlib.dhp.graph;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import scala.tools.nsc.doc.model.Public;
|
import scala.tools.nsc.doc.model.Public;
|
||||||
|
|
||||||
|
@ -12,10 +15,9 @@ public class ProtoConverterTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void convertDatasourceTest() throws Exception {
|
public void convertDatasourceTest() throws Exception {
|
||||||
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/organization.json"));
|
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/datasource.json"));
|
||||||
|
|
||||||
Oaf result = ProtoConverter.convert(json);
|
Oaf result = ProtoConverter.convert(json);
|
||||||
|
|
||||||
assertNotNull(result);
|
assertNotNull(result);
|
||||||
assertTrue(result instanceof Datasource);
|
assertTrue(result instanceof Datasource);
|
||||||
Datasource ds = (Datasource) result;
|
Datasource ds = (Datasource) result;
|
||||||
|
@ -24,6 +26,25 @@ public class ProtoConverterTest {
|
||||||
System.out.println(ds.getId());
|
System.out.println(ds.getId());
|
||||||
|
|
||||||
|
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
System.out.println(mapper.writeValueAsString(result));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void convertOrganizationTest() throws Exception {
|
||||||
|
|
||||||
|
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/organization.json"));
|
||||||
|
|
||||||
|
Oaf result = ProtoConverter.convert(json);
|
||||||
|
assertNotNull(result);
|
||||||
|
assertTrue(result instanceof Organization);
|
||||||
|
Organization ds = (Organization) result;
|
||||||
|
assertNotNull(ds.getId());
|
||||||
|
|
||||||
|
System.out.println(ds.getId());
|
||||||
|
|
||||||
|
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
System.out.println(mapper.writeValueAsString(result));
|
System.out.println(mapper.writeValueAsString(result));
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
{
|
||||||
|
"kind": "entity",
|
||||||
|
"entity": {
|
||||||
|
"type": "datasource",
|
||||||
|
"datasource": {
|
||||||
|
"metadata": {
|
||||||
|
"officialname": {
|
||||||
|
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
|
||||||
|
},
|
||||||
|
"englishname": {
|
||||||
|
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
|
||||||
|
},
|
||||||
|
"websiteurl": {
|
||||||
|
"value": "https://cris.uns.ac.rs/"
|
||||||
|
},
|
||||||
|
"accessinfopackage": [
|
||||||
|
{
|
||||||
|
"value": "https://cris.uns.ac.rs/OAIHandlerOpenAIRECRIS"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"namespaceprefix": {
|
||||||
|
"value": "CrisUnsNoviS"
|
||||||
|
},
|
||||||
|
"datasourcetype": {
|
||||||
|
"classid": "crissystem",
|
||||||
|
"classname": "CRIS System",
|
||||||
|
"schemeid": "dnet:datasource_typologies",
|
||||||
|
"schemename": "dnet:datasource_typologies"
|
||||||
|
},
|
||||||
|
"openairecompatibility": {
|
||||||
|
"classid": "openaire-cris_1.1",
|
||||||
|
"classname": "OpenAIRE CRIS v1.1",
|
||||||
|
"schemeid": "dnet:datasourceCompatibilityLevel",
|
||||||
|
"schemename": "dnet:datasourceCompatibilityLevel"
|
||||||
|
},
|
||||||
|
"latitude": {
|
||||||
|
"value": "0.0"
|
||||||
|
},
|
||||||
|
"longitude": {
|
||||||
|
"value": "0.0"
|
||||||
|
},
|
||||||
|
"journal": {
|
||||||
|
"issnPrinted": "",
|
||||||
|
"issnOnline": "",
|
||||||
|
"issnLinking": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"originalId": [
|
||||||
|
"CRIS_UNS____::openaire"
|
||||||
|
],
|
||||||
|
"collectedfrom": [
|
||||||
|
{
|
||||||
|
"key": "",
|
||||||
|
"value": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"dateofcollection": "2019-04-04",
|
||||||
|
"id": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556",
|
||||||
|
"dateoftransformation": ""
|
||||||
|
},
|
||||||
|
"dataInfo": {
|
||||||
|
"inferred": false,
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"trust": "0.9",
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:entityregistry",
|
||||||
|
"classname": "sysimport:crosswalk:entityregistry",
|
||||||
|
"schemeid": "dnet:provenance_actions",
|
||||||
|
"schemename": "dnet:provenance_actions"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,68 +1,68 @@
|
||||||
{
|
{
|
||||||
"kind": "entity",
|
"kind": "entity",
|
||||||
"entity": {
|
"entity": {
|
||||||
"type": "datasource",
|
"type": "organization",
|
||||||
"datasource": {
|
"organization": {
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"officialname": {
|
"legalname": {
|
||||||
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
|
"value": "University of Utrecht"
|
||||||
},
|
},
|
||||||
"englishname": {
|
"eclegalbody": {
|
||||||
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
|
"value": "false"
|
||||||
},
|
},
|
||||||
"websiteurl": {
|
"eclegalperson": {
|
||||||
"value": "https://cris.uns.ac.rs/"
|
"value": "false"
|
||||||
},
|
},
|
||||||
"accessinfopackage": [
|
"ecnonprofit": {
|
||||||
{
|
"value": "false"
|
||||||
"value": "https://cris.uns.ac.rs/OAIHandlerOpenAIRECRIS"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"namespaceprefix": {
|
|
||||||
"value": "CrisUnsNoviS"
|
|
||||||
},
|
},
|
||||||
"datasourcetype": {
|
"ecresearchorganization": {
|
||||||
"classid": "crissystem",
|
"value": "false"
|
||||||
"classname": "CRIS System",
|
|
||||||
"schemeid": "dnet:datasource_typologies",
|
|
||||||
"schemename": "dnet:datasource_typologies"
|
|
||||||
},
|
},
|
||||||
"openairecompatibility": {
|
"echighereducation": {
|
||||||
"classid": "openaire-cris_1.1",
|
"value": "false"
|
||||||
"classname": "OpenAIRE CRIS v1.1",
|
|
||||||
"schemeid": "dnet:datasourceCompatibilityLevel",
|
|
||||||
"schemename": "dnet:datasourceCompatibilityLevel"
|
|
||||||
},
|
},
|
||||||
"latitude": {
|
"ecinternationalorganizationeurinterests": {
|
||||||
"value": "0.0"
|
"value": "false"
|
||||||
},
|
},
|
||||||
"longitude": {
|
"ecinternationalorganization": {
|
||||||
"value": "0.0"
|
"value": "false"
|
||||||
},
|
},
|
||||||
"journal": {
|
"ecenterprise": {
|
||||||
"issnPrinted": "",
|
"value": "false"
|
||||||
"issnOnline": "",
|
},
|
||||||
"issnLinking": ""
|
"ecsmevalidated": {
|
||||||
|
"value": "false"
|
||||||
|
},
|
||||||
|
"ecnutscode": {
|
||||||
|
"value": "false"
|
||||||
|
},
|
||||||
|
"country": {
|
||||||
|
"classid": "FI",
|
||||||
|
"classname": "Finland",
|
||||||
|
"schemeid": "dnet:countries",
|
||||||
|
"schemename": "dnet:countries"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"originalId": [
|
"originalId": [
|
||||||
"CRIS_UNS____::openaire"
|
"aka_________::f88cc5f874ff27f0fd6e7cb24842e9fb"
|
||||||
],
|
],
|
||||||
"collectedfrom": [
|
"collectedfrom": [
|
||||||
{
|
{
|
||||||
"key": "",
|
"key": "10|openaire____::6ac933301a3933c8a22ceebea7000326",
|
||||||
"value": ""
|
"value": "Academy of Finland"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"dateofcollection": "2019-04-04",
|
"dateofcollection": "2018-09-28",
|
||||||
"id": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556",
|
"id": "20|aka_________::0070a5080d7092f960fb33c8a9fca016",
|
||||||
"dateoftransformation": ""
|
"dateoftransformation": "2019-04-16"
|
||||||
},
|
},
|
||||||
"dataInfo": {
|
"dataInfo": {
|
||||||
"inferred": false,
|
"inferred": true,
|
||||||
"deletedbyinference": false,
|
"deletedbyinference": true,
|
||||||
"trust": "0.9",
|
"trust": "0.9",
|
||||||
|
"inferenceprovenance": "dedup-similarity-organization-simple",
|
||||||
"provenanceaction": {
|
"provenanceaction": {
|
||||||
"classid": "sysimport:crosswalk:entityregistry",
|
"classid": "sysimport:crosswalk:entityregistry",
|
||||||
"classname": "sysimport:crosswalk:entityregistry",
|
"classname": "sysimport:crosswalk:entityregistry",
|
||||||
|
@ -70,4 +70,4 @@
|
||||||
"schemename": "dnet:provenance_actions"
|
"schemename": "dnet:provenance_actions"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue