forked from D-Net/dnet-hadoop
WIP: materialize graph as Hive DB
This commit is contained in:
parent
cce21eafc2
commit
a5f23d8a4c
|
@ -65,6 +65,8 @@ public class GraphSupport {
|
||||||
|
|
||||||
log.info("reading graph {}, format {}, class {}", graph, format, clazz);
|
log.info("reading graph {}, format {}, class {}", graph, format, clazz);
|
||||||
|
|
||||||
|
Encoder<T> encoder = Encoders.bean(clazz);
|
||||||
|
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case JSON:
|
case JSON:
|
||||||
String path = graph + "/" + clazz.getSimpleName().toLowerCase();
|
String path = graph + "/" + clazz.getSimpleName().toLowerCase();
|
||||||
|
@ -74,21 +76,18 @@ public class GraphSupport {
|
||||||
.textFile(path)
|
.textFile(path)
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
|
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
|
||||||
Encoders.bean(clazz))
|
encoder)
|
||||||
.filter((FilterFunction<T>) value -> Objects.nonNull(ModelSupport.idFn().apply(value)));
|
.filter((FilterFunction<T>) value -> Objects.nonNull(ModelSupport.idFn().apply(value)));
|
||||||
case HIVE:
|
case HIVE:
|
||||||
String table = ModelSupport.tableIdentifier(graph, clazz);
|
String table = ModelSupport.tableIdentifier(graph, clazz);
|
||||||
log.info("reading table {}", table);
|
log.info("reading table {}", table);
|
||||||
return spark.read().table(table).as(Encoders.bean(clazz));
|
|
||||||
|
return spark.table(table).as(encoder);
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException(String.format("format not managed: '%s'", format));
|
throw new IllegalStateException(String.format("format not managed: '%s'", format));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends Oaf> Dataset<T> readGraphPARQUET(SparkSession spark, String graph, Class<T> clazz) {
|
|
||||||
return readGraph(spark, graph, clazz, GraphFormat.HIVE);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static <T extends Oaf> Dataset<T> readGraphJSON(SparkSession spark, String graph, Class<T> clazz) {
|
public static <T extends Oaf> Dataset<T> readGraphJSON(SparkSession spark, String graph, Class<T> clazz) {
|
||||||
return readGraph(spark, graph, clazz, GraphFormat.JSON);
|
return readGraph(spark, graph, clazz, GraphFormat.JSON);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue