enrichment steps #38

Merged
claudio.atzori merged 334 commits from miriam.baglioni/dnet-hadoop:master into enrichment_wfs 2020-08-11 16:40:26 +02:00
1 changed files with 7 additions and 3 deletions
Showing only changes of commit 35b7279147 - Show all commits

View File

@ -5,7 +5,9 @@ import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
@ -82,10 +84,12 @@ public class SparkUpdateProjectTest {
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
JavaRDD<Project> tmp = sc JavaRDD<Project> tmp = sc
.textFile(workingDir.toString() + "/actionSet") .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
.map(item -> OBJECT_MAPPER.readValue(item, Project.class)); .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
.map(aa -> ((Project)aa.getPayload()))
;
Assertions.assertEquals(16, tmp.count()); Assertions.assertEquals(14, tmp.count());
// Dataset<CSVProgramme> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); // Dataset<CSVProgramme> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class));
// //