BrBETA_dnet-hadoop/dhp-workflows/dhp-workflows-common/src/main/java/eu/dnetlib/dhp/common/GraphSupport.java

36 lines
980 B
Java
Raw Normal View History

2020-07-28 14:59:14 +02:00
package eu.dnetlib.dhp.common;
import org.apache.spark.sql.DataFrameWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SaveMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Oaf;
2020-07-28 14:59:14 +02:00
public class GraphSupport {
private static final Logger log = LoggerFactory.getLogger(GraphSupport.class);
2020-07-28 14:59:14 +02:00
private static <T extends Oaf> void saveGraphTable(Dataset<T> dataset, Class<T> clazz, String outputGraph,
eu.dnetlib.dhp.common.SaveMode saveMode) {
2020-07-28 14:59:14 +02:00
log.info("saving graph in {} mode to {}", outputGraph, saveMode.toString());
2020-07-28 14:59:14 +02:00
final DataFrameWriter<T> writer = dataset.write().mode(SaveMode.Overwrite);
switch (saveMode) {
case JSON:
writer.option("compression", "gzip").json(outputGraph);
break;
case PARQUET:
final String db_table = ModelSupport.tableIdentifier(outputGraph, clazz);
writer.saveAsTable(db_table);
break;
}
2020-07-28 14:59:14 +02:00
}
2020-07-28 14:59:14 +02:00
}