dump of the results related to at least one project #61

Merged
claudio.atzori merged 51 commits from miriam.baglioni/dnet-hadoop:dump into master 2020-12-09 17:22:57 +01:00
1 changed files with 5 additions and 5 deletions
Showing only changes of commit e758d5d9b4 - Show all commits

View File

@ -6,9 +6,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable; import java.io.Serializable;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
@ -64,11 +64,11 @@ public class SparkResultLinkedToProject implements Serializable {
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
Utils.removeOutputDir(spark, outputPath); Utils.removeOutputDir(spark, outputPath);
writeResultsLikedToProjects(spark, inputClazz, inputPath, outputPath, relationPath); writeResultsLinkedToProjects(spark, inputClazz, inputPath, outputPath, relationPath);
}); });
} }
private static <R extends Result> void writeResultsLikedToProjects(SparkSession spark, Class<R> inputClazz, private static <R extends Result> void writeResultsLinkedToProjects(SparkSession spark, Class<R> inputClazz,
String inputPath, String outputPath, String relationPath) { String inputPath, String outputPath, String relationPath) {
Dataset<R> results = Utils Dataset<R> results = Utils
@ -76,7 +76,7 @@ public class SparkResultLinkedToProject implements Serializable {
.filter("dataInfo.deletedbyinference = false and datainfo.invisible = false"); .filter("dataInfo.deletedbyinference = false and datainfo.invisible = false");
Dataset<Relation> relations = Utils Dataset<Relation> relations = Utils
.readPath(spark, relationPath, Relation.class) .readPath(spark, relationPath, Relation.class)
.filter("dataInfo.deletedbyinference = false and lower(relClass) = 'isproducedby'"); .filter("dataInfo.deletedbyinference = false and lower(relClass) = '" + Constants.RESULT_PROJECT_IS_PRODUCED_BY.toLowerCase() + "'");
relations relations
.joinWith( .joinWith(