got back to the readPath we have before

This commit is contained in:
Miriam Baglioni 2020-05-08 13:08:56 +02:00
parent b2192fdcdc
commit 9a29ab7508
1 changed files with 5 additions and 4 deletions

View File

@ -25,6 +25,7 @@ import eu.dnetlib.dhp.schema.oaf.*;
public class SparkBulkTagJob { public class SparkBulkTagJob {
private static final Logger log = LoggerFactory.getLogger(SparkBulkTagJob.class); private static final Logger log = LoggerFactory.getLogger(SparkBulkTagJob.class);
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
@ -108,12 +109,12 @@ public class SparkBulkTagJob {
.json(outputPath); .json(outputPath);
} }
private static <R> Dataset<R> readPath( public static <R> Dataset<R> readPath(
SparkSession spark, String inputEntityPath, Class<R> clazz) { SparkSession spark, String inputPath, Class<R> clazz) {
return spark return spark
.read() .read()
.json(inputEntityPath) .textFile(inputPath)
.as(Encoders.bean(clazz)); .map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
} }
} }