forked from antonis.lempesis/dnet-hadoop
minor changes
This commit is contained in:
commit
9fa5aebe9c
|
@ -206,6 +206,7 @@ public class ProtoConverter implements Serializable {
|
|||
.collect(Collectors.toList()))
|
||||
.setCodeRepositoryUrl(ProtoUtils.mapStringField(m.getCodeRepositoryUrl()))
|
||||
.setProgrammingLanguage(ProtoUtils.mapQualifier(m.getProgrammingLanguage()));
|
||||
|
||||
}
|
||||
|
||||
private static OtherResearchProducts createORP(OafProtos.Oaf oaf) {
|
||||
|
|
|
@ -1,22 +1,16 @@
|
|||
package eu.dnetlib.dhp.graph;
|
||||
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.PairFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoder;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import scala.Tuple2;
|
||||
|
||||
import javax.xml.crypto.Data;
|
||||
|
||||
public class SparkGraphImporterJob {
|
||||
|
||||
|
||||
|
@ -41,6 +35,12 @@ public class SparkGraphImporterJob {
|
|||
.map(item -> new Tuple2<>(item._1.toString(), item._2.toString()));
|
||||
|
||||
|
||||
|
||||
final String body = inputRDD.filter(s -> s._1().contains("20|") && s._1().split("@")[2].equalsIgnoreCase("body")).map(Tuple2::_2).first();
|
||||
|
||||
System.out.println(body);
|
||||
|
||||
|
||||
final JavaRDD<Organization> organization = inputRDD
|
||||
.filter(s -> s._1().split("@")[2].equalsIgnoreCase("body"))
|
||||
.map(Tuple2::_2)
|
||||
|
|
|
@ -2,8 +2,11 @@ package eu.dnetlib.dhp.graph;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import org.junit.Test;
|
||||
import scala.tools.nsc.doc.model.Public;
|
||||
|
||||
|
@ -12,10 +15,9 @@ public class ProtoConverterTest {
|
|||
|
||||
@Test
|
||||
public void convertDatasourceTest() throws Exception {
|
||||
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/organization.json"));
|
||||
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/datasource.json"));
|
||||
|
||||
Oaf result = ProtoConverter.convert(json);
|
||||
|
||||
assertNotNull(result);
|
||||
assertTrue(result instanceof Datasource);
|
||||
Datasource ds = (Datasource) result;
|
||||
|
@ -24,6 +26,25 @@ public class ProtoConverterTest {
|
|||
System.out.println(ds.getId());
|
||||
|
||||
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
System.out.println(mapper.writeValueAsString(result));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void convertOrganizationTest() throws Exception {
|
||||
|
||||
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/organization.json"));
|
||||
|
||||
Oaf result = ProtoConverter.convert(json);
|
||||
assertNotNull(result);
|
||||
assertTrue(result instanceof Organization);
|
||||
Organization ds = (Organization) result;
|
||||
assertNotNull(ds.getId());
|
||||
|
||||
System.out.println(ds.getId());
|
||||
|
||||
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
System.out.println(mapper.writeValueAsString(result));
|
||||
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
{
|
||||
"kind": "entity",
|
||||
"entity": {
|
||||
"type": "datasource",
|
||||
"datasource": {
|
||||
"metadata": {
|
||||
"officialname": {
|
||||
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
|
||||
},
|
||||
"englishname": {
|
||||
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
|
||||
},
|
||||
"websiteurl": {
|
||||
"value": "https://cris.uns.ac.rs/"
|
||||
},
|
||||
"accessinfopackage": [
|
||||
{
|
||||
"value": "https://cris.uns.ac.rs/OAIHandlerOpenAIRECRIS"
|
||||
}
|
||||
],
|
||||
"namespaceprefix": {
|
||||
"value": "CrisUnsNoviS"
|
||||
},
|
||||
"datasourcetype": {
|
||||
"classid": "crissystem",
|
||||
"classname": "CRIS System",
|
||||
"schemeid": "dnet:datasource_typologies",
|
||||
"schemename": "dnet:datasource_typologies"
|
||||
},
|
||||
"openairecompatibility": {
|
||||
"classid": "openaire-cris_1.1",
|
||||
"classname": "OpenAIRE CRIS v1.1",
|
||||
"schemeid": "dnet:datasourceCompatibilityLevel",
|
||||
"schemename": "dnet:datasourceCompatibilityLevel"
|
||||
},
|
||||
"latitude": {
|
||||
"value": "0.0"
|
||||
},
|
||||
"longitude": {
|
||||
"value": "0.0"
|
||||
},
|
||||
"journal": {
|
||||
"issnPrinted": "",
|
||||
"issnOnline": "",
|
||||
"issnLinking": ""
|
||||
}
|
||||
}
|
||||
},
|
||||
"originalId": [
|
||||
"CRIS_UNS____::openaire"
|
||||
],
|
||||
"collectedfrom": [
|
||||
{
|
||||
"key": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"dateofcollection": "2019-04-04",
|
||||
"id": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556",
|
||||
"dateoftransformation": ""
|
||||
},
|
||||
"dataInfo": {
|
||||
"inferred": false,
|
||||
"deletedbyinference": false,
|
||||
"trust": "0.9",
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:entityregistry",
|
||||
"classname": "sysimport:crosswalk:entityregistry",
|
||||
"schemeid": "dnet:provenance_actions",
|
||||
"schemename": "dnet:provenance_actions"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,68 +1,68 @@
|
|||
{
|
||||
"kind": "entity",
|
||||
"entity": {
|
||||
"type": "datasource",
|
||||
"datasource": {
|
||||
"type": "organization",
|
||||
"organization": {
|
||||
"metadata": {
|
||||
"officialname": {
|
||||
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
|
||||
"legalname": {
|
||||
"value": "University of Utrecht"
|
||||
},
|
||||
"englishname": {
|
||||
"value": "CRIS UNS (Current Research Information System University of Novi Sad)"
|
||||
"eclegalbody": {
|
||||
"value": "false"
|
||||
},
|
||||
"websiteurl": {
|
||||
"value": "https://cris.uns.ac.rs/"
|
||||
"eclegalperson": {
|
||||
"value": "false"
|
||||
},
|
||||
"accessinfopackage": [
|
||||
{
|
||||
"value": "https://cris.uns.ac.rs/OAIHandlerOpenAIRECRIS"
|
||||
}
|
||||
],
|
||||
"namespaceprefix": {
|
||||
"value": "CrisUnsNoviS"
|
||||
"ecnonprofit": {
|
||||
"value": "false"
|
||||
},
|
||||
"datasourcetype": {
|
||||
"classid": "crissystem",
|
||||
"classname": "CRIS System",
|
||||
"schemeid": "dnet:datasource_typologies",
|
||||
"schemename": "dnet:datasource_typologies"
|
||||
"ecresearchorganization": {
|
||||
"value": "false"
|
||||
},
|
||||
"openairecompatibility": {
|
||||
"classid": "openaire-cris_1.1",
|
||||
"classname": "OpenAIRE CRIS v1.1",
|
||||
"schemeid": "dnet:datasourceCompatibilityLevel",
|
||||
"schemename": "dnet:datasourceCompatibilityLevel"
|
||||
"echighereducation": {
|
||||
"value": "false"
|
||||
},
|
||||
"latitude": {
|
||||
"value": "0.0"
|
||||
"ecinternationalorganizationeurinterests": {
|
||||
"value": "false"
|
||||
},
|
||||
"longitude": {
|
||||
"value": "0.0"
|
||||
"ecinternationalorganization": {
|
||||
"value": "false"
|
||||
},
|
||||
"journal": {
|
||||
"issnPrinted": "",
|
||||
"issnOnline": "",
|
||||
"issnLinking": ""
|
||||
"ecenterprise": {
|
||||
"value": "false"
|
||||
},
|
||||
"ecsmevalidated": {
|
||||
"value": "false"
|
||||
},
|
||||
"ecnutscode": {
|
||||
"value": "false"
|
||||
},
|
||||
"country": {
|
||||
"classid": "FI",
|
||||
"classname": "Finland",
|
||||
"schemeid": "dnet:countries",
|
||||
"schemename": "dnet:countries"
|
||||
}
|
||||
}
|
||||
},
|
||||
"originalId": [
|
||||
"CRIS_UNS____::openaire"
|
||||
"aka_________::f88cc5f874ff27f0fd6e7cb24842e9fb"
|
||||
],
|
||||
"collectedfrom": [
|
||||
{
|
||||
"key": "",
|
||||
"value": ""
|
||||
"key": "10|openaire____::6ac933301a3933c8a22ceebea7000326",
|
||||
"value": "Academy of Finland"
|
||||
}
|
||||
],
|
||||
"dateofcollection": "2019-04-04",
|
||||
"id": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556",
|
||||
"dateoftransformation": ""
|
||||
"dateofcollection": "2018-09-28",
|
||||
"id": "20|aka_________::0070a5080d7092f960fb33c8a9fca016",
|
||||
"dateoftransformation": "2019-04-16"
|
||||
},
|
||||
"dataInfo": {
|
||||
"inferred": false,
|
||||
"deletedbyinference": false,
|
||||
"inferred": true,
|
||||
"deletedbyinference": true,
|
||||
"trust": "0.9",
|
||||
"inferenceprovenance": "dedup-similarity-organization-simple",
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:entityregistry",
|
||||
"classname": "sysimport:crosswalk:entityregistry",
|
||||
|
|
Loading…
Reference in New Issue