master #11

Manually merged
claudio.atzori merged 275 commits from :master into enrichment_wfs 2020-05-11 15:14:56 +02:00
1 changed files with 1 additions and 55 deletions
Showing only changes of commit 95a54d5460 - Show all commits

View File

@ -50,16 +50,6 @@ public class SparkCountryPropagationJob2 {
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String resultType =
resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase();
log.info("resultType: {}", resultType);
final Boolean writeUpdates =
Optional.ofNullable(parser.get("writeUpdate"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("writeUpdate: {}", writeUpdates);
final Boolean saveGraph =
Optional.ofNullable(parser.get("saveGraph"))
.map(Boolean::valueOf)
@ -76,17 +66,12 @@ public class SparkCountryPropagationJob2 {
conf,
isSparkSessionManaged,
spark -> {
// createOutputDirs(outputPath,
// FileSystem.get(spark.sparkContext().hadoopConfiguration()));
removeOutputDir(spark, outputPath);
execPropagation(
spark,
datasourcecountrypath,
inputPath,
outputPath,
resultClazz,
resultType,
writeUpdates,
saveGraph);
});
}
@ -97,12 +82,10 @@ public class SparkCountryPropagationJob2 {
String inputPath,
String outputPath,
Class<R> resultClazz,
String resultType,
boolean writeUpdates,
boolean saveGraph) {
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
// Load parque file with preprocessed association datasource - country
// Load file with preprocessed association datasource - country
Dataset<DatasourceCountry> datasourcecountryassoc =
readAssocDatasourceCountry(spark, datasourcecountrypath);
// broadcasting the result of the preparation step
@ -114,10 +97,6 @@ public class SparkCountryPropagationJob2 {
spark, inputPath, resultClazz, broadcast_datasourcecountryassoc)
.as(Encoders.bean(ResultCountrySet.class));
if (writeUpdates) {
writeUpdates(potentialUpdates, outputPath + "/update_" + resultType);
}
if (saveGraph) {
updateResultTable(spark, potentialUpdates, inputPath, resultClazz, outputPath);
}
@ -138,11 +117,6 @@ public class SparkCountryPropagationJob2 {
r -> new Tuple2<>(r.getId(), r),
Encoders.tuple(Encoders.STRING(), Encoders.bean(resultClazz)));
// Dataset<Tuple2<String, ResultCountrySet>> potential_update_pair =
// potentialUpdates.map(pu -> new Tuple2<>(pu.getResultId(),
// pu),
// Encoders.tuple(Encoders.STRING(), Encoders.bean(ResultCountrySet.class)));
Dataset<R> new_table =
result_pair
.joinWith(
@ -184,10 +158,6 @@ public class SparkCountryPropagationJob2 {
log.info("Saving graph table to path: {}", outputPath);
// log.info("number of saved recordsa: {}", new_table.count());
new_table.toJSON().write().option("compression", "gzip").text(outputPath);
// .toJavaRDD()
// .map(r -> OBJECT_MAPPER.writeValueAsString(r))
// .saveAsTextFile(outputPath , GzipCodec.class);
}
private static <R extends Result> Dataset<Row> getPotentialResultToUpdate(
@ -203,18 +173,6 @@ public class SparkCountryPropagationJob2 {
return countryPropagationAssoc(spark, broadcast_datasourcecountryassoc);
}
// private static void createCfHbforresult(SparkSession spark) {
// String query;
// query = "SELECT id, inst.collectedfrom.key cf , inst.hostedby.key hb " +
// "FROM ( SELECT id, instance " +
// "FROM result " +
// " WHERE datainfo.deletedbyinference = false) ds " +
// "LATERAL VIEW EXPLODE(instance) i AS inst";
// Dataset<Row> cfhb = spark.sql(query);
// cfhb.createOrReplaceTempView("cfhb");
// //log.info("cfhb_number : {}", cfhb.count());
// }
private static Dataset<Row> countryPropagationAssoc(
SparkSession spark,
Broadcast<Dataset<DatasourceCountry>> broadcast_datasourcecountryassoc) {
@ -248,16 +206,4 @@ public class SparkCountryPropagationJob2 {
value -> OBJECT_MAPPER.readValue(value, DatasourceCountry.class),
Encoders.bean(DatasourceCountry.class));
}
private static void writeUpdates(
Dataset<ResultCountrySet> potentialUpdates, String outputPath) {
potentialUpdates
.toJSON()
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.text(outputPath);
// map(u -> OBJECT_MAPPER.writeValueAsString(u))
// .saveAsTextFile(outputPath, GzipCodec.class);
}
}