WIP: materialize graph as Hive DB

This commit is contained in:
Claudio Atzori 2020-08-07 15:33:07 +02:00
parent cce21eafc2
commit a5f23d8a4c
1 changed files with 5 additions and 6 deletions

View File

@ -65,6 +65,8 @@ public class GraphSupport {
log.info("reading graph {}, format {}, class {}", graph, format, clazz);
Encoder<T> encoder = Encoders.bean(clazz);
switch (format) {
case JSON:
String path = graph + "/" + clazz.getSimpleName().toLowerCase();
@ -74,21 +76,18 @@ public class GraphSupport {
.textFile(path)
.map(
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
Encoders.bean(clazz))
encoder)
.filter((FilterFunction<T>) value -> Objects.nonNull(ModelSupport.idFn().apply(value)));
case HIVE:
String table = ModelSupport.tableIdentifier(graph, clazz);
log.info("reading table {}", table);
return spark.read().table(table).as(Encoders.bean(clazz));
return spark.table(table).as(encoder);
default:
throw new IllegalStateException(String.format("format not managed: '%s'", format));
}
}
public static <T extends Oaf> Dataset<T> readGraphPARQUET(SparkSession spark, String graph, Class<T> clazz) {
return readGraph(spark, graph, clazz, GraphFormat.HIVE);
}
public static <T extends Oaf> Dataset<T> readGraphJSON(SparkSession spark, String graph, Class<T> clazz) {
return readGraph(spark, graph, clazz, GraphFormat.JSON);
}