From 18ec8e814724c0a38596784bc4cfe6225527c1ee Mon Sep 17 00:00:00 2001 From: "sandro.labruzzo" Date: Thu, 31 Oct 2019 11:31:37 +0100 Subject: [PATCH] moved protoutils function to dhp-schemas --- .../dhp/schema/util}/ProtoConverter.java | 4 +- .../dnetlib/dhp/schema/util}/ProtoUtils.java | 2 +- dhp-workflows/dhp-graph-mapper/pom.xml | 5 ++ .../dhp/graph/SparkGraphImporterJob.java | 17 +++--- .../dnetlib/dhp/graph/ProtoConverterTest.java | 1 + .../dhp/graph/SparkGraphImporterJobTest.java | 3 +- pom.xml | 57 +++---------------- 7 files changed, 28 insertions(+), 61 deletions(-) rename {dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph => dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/util}/ProtoConverter.java (99%) rename {dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph => dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/util}/ProtoUtils.java (99%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/util/ProtoConverter.java similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/util/ProtoConverter.java index 6cf64923e6..69b2012482 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/util/ProtoConverter.java @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.graph; +package eu.dnetlib.dhp.schema.util; import eu.dnetlib.data.proto.*; import eu.dnetlib.dhp.schema.oaf.*; @@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.oaf.*; import java.io.Serializable; import java.util.stream.Collectors; -import static eu.dnetlib.dhp.graph.ProtoUtils.*; +import static eu.dnetlib.dhp.schema.util.ProtoUtils.*; public class ProtoConverter implements Serializable { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoUtils.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/util/ProtoUtils.java similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoUtils.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/util/ProtoUtils.java index 41e0baa57b..6834e217cf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoUtils.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/util/ProtoUtils.java @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.graph; +package eu.dnetlib.dhp.schema.util; import com.googlecode.protobuf.format.JsonFormat; import eu.dnetlib.data.proto.FieldTypeProtos; diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 4a4492ceef..b70058f8f9 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -44,6 +44,11 @@ 1.0.0-SNAPSHOT + + de.javakaffee + kryo-serializers + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java index 625b342237..573495795e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.graph; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.util.ProtoConverter; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.io.Text; @@ -49,22 +50,22 @@ public class SparkGraphImporterJob { final Encoder relationEncoder = Encoders.bean(Relation.class); - if (filter == null|| filter.toLowerCase().contains("organization")) + if (StringUtils.isBlank(filter)|| filter.toLowerCase().contains("organization")) spark.createDataset(oafRdd.filter(s -> s instanceof Organization).map(s -> (Organization) s).rdd(), organizationEncoder).write().save(outputPath + "/organizations"); - if (filter == null|| filter.toLowerCase().contains("project")) + if (StringUtils.isBlank(filter)|| filter.toLowerCase().contains("project")) spark.createDataset(oafRdd.filter(s -> s instanceof Project).map(s -> (Project) s).rdd(), projectEncoder).write().save(outputPath + "/projects"); - if (filter == null|| filter.toLowerCase().contains("datasource")) + if (StringUtils.isBlank(filter)|| filter.toLowerCase().contains("datasource")) spark.createDataset(oafRdd.filter(s -> s instanceof Datasource).map(s -> (Datasource) s).rdd(), datasourceEncoder).write().save(outputPath + "/datasources"); - if (filter == null|| filter.toLowerCase().contains("dataset")) + if (StringUtils.isBlank(filter)|| filter.toLowerCase().contains("dataset")) spark.createDataset(oafRdd.filter(s -> s instanceof eu.dnetlib.dhp.schema.oaf.Dataset).map(s -> (eu.dnetlib.dhp.schema.oaf.Dataset) s).rdd(), datasetEncoder).write().save(outputPath + "/datasets"); - if (filter == null|| filter.toLowerCase().contains("publication")) + if (StringUtils.isBlank(filter)|| filter.toLowerCase().contains("publication")) spark.createDataset(oafRdd.filter(s -> s instanceof Publication).map(s -> (Publication) s).rdd(), publicationEncoder).write().save(outputPath + "/publications"); - if (filter == null|| filter.toLowerCase().contains("software")) + if (StringUtils.isBlank(filter)|| filter.toLowerCase().contains("software")) spark.createDataset(oafRdd.filter(s -> s instanceof Software).map(s -> (Software) s).rdd(), softwareEncoder).write().save(outputPath + "/software"); - if (filter == null|| filter.toLowerCase().contains("otherresearchproduct")) + if (StringUtils.isBlank(filter)|| filter.toLowerCase().contains("otherresearchproduct")) spark.createDataset(oafRdd.filter(s -> s instanceof OtherResearchProducts).map(s -> (OtherResearchProducts) s).rdd(), otherResearchProductsEncoder).write().save(outputPath + "/otherResearchProducts"); - if (filter == null|| filter.toLowerCase().contains("relation")) + if (StringUtils.isBlank(filter)|| filter.toLowerCase().contains("relation")) spark.createDataset(oafRdd.filter(s -> s instanceof Relation).map(s -> (Relation) s).rdd(), relationEncoder).write().save(outputPath + "/relations"); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java index 8afd0cda69..b8a062f919 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.graph; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.util.ProtoConverter; import org.apache.commons.io.IOUtils; import org.junit.Test; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/SparkGraphImporterJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/SparkGraphImporterJobTest.java index ce5185b610..dd5468de46 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/SparkGraphImporterJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/SparkGraphImporterJobTest.java @@ -8,7 +8,8 @@ public class SparkGraphImporterJobTest { @Test @Ignore public void testImport() throws Exception { - SparkGraphImporterJob.main(new String[]{"-mt", "local[*]","-i", "/home/sandro/part-m-02236", "-o", "/tmp/dataframes", "-f", "software,relation"}); + SparkGraphImporterJob.main(new String[]{"-mt", "local[*]","-i", "/home/sandro/part-m-02236", "-o", "/tmp/dataframes", "-f", "publication"}); + } } diff --git a/pom.xml b/pom.xml index 31a4089233..1727e1c1cd 100644 --- a/pom.xml +++ b/pom.xml @@ -47,7 +47,6 @@ - dnet45-releases D-Net 45 releases @@ -60,22 +59,6 @@ true - - - cloudera Cloudera Repository @@ -183,6 +166,12 @@ 1.1.6 + + de.javakaffee + kryo-serializers + 0.45 + + net.schmizz sshj @@ -252,7 +241,7 @@ eu.dnetlib dnet-openaire-data-protos - 3.9.5-proto250 + 3.9.5 @@ -326,30 +315,6 @@ true - - - org.apache.maven.plugins maven-javadoc-plugin @@ -369,10 +334,8 @@ build-helper-maven-plugin 1.12 - - org.apache.maven.plugins @@ -415,10 +378,7 @@ 2.10 - - - dnet45-snapshots @@ -431,7 +391,6 @@ http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases - @@ -456,7 +415,7 @@ 2.9.6 3.5 2.11.8 - 2.5.0 + 2.4.1