From 9a29ab75081aad03fee582d587aaecb02c10aa2b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 8 May 2020 13:08:56 +0200 Subject: [PATCH] got back to the readPath we have before --- .../java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 90a99926e..e62b4b4fc 100644 --- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -25,6 +25,7 @@ import eu.dnetlib.dhp.schema.oaf.*; public class SparkBulkTagJob { private static final Logger log = LoggerFactory.getLogger(SparkBulkTagJob.class); + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -108,12 +109,12 @@ public class SparkBulkTagJob { .json(outputPath); } - private static Dataset readPath( - SparkSession spark, String inputEntityPath, Class clazz) { + public static Dataset readPath( + SparkSession spark, String inputPath, Class clazz) { return spark .read() - .json(inputEntityPath) - .as(Encoders.bean(clazz)); + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } }