From 26d2ad6ebbb1e5c1a4377a09ad478b8c819770eb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 7 Aug 2020 17:41:56 +0200 Subject: [PATCH] refactoring --- .../dhp/oa/graph/dump/graph/Extractor.java | 61 +++---------------- 1 file changed, 10 insertions(+), 51 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Extractor.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Extractor.java index 940975893..6a1ad9a40 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Extractor.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Extractor.java @@ -8,47 +8,22 @@ import java.util.*; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import com.google.gson.Gson; - -import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.oa.graph.dump.zenodo.Community; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.dump.oaf.Provenance; import eu.dnetlib.dhp.schema.dump.oaf.graph.Node; import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType; import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation; import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Result; public class Extractor implements Serializable { -// public void run(Boolean isSparkSessionManaged, -// String inputPath, -// String outputPath, -// Class inputClazz, -// String communityMapPath) { -// -// SparkConf conf = new SparkConf(); -// -// runWithSparkSession( -// conf, -// isSparkSessionManaged, -// spark -> { -// Utils.removeOutputDir(spark, outputPath); -// extractRelationResult( -// spark, inputPath, outputPath, inputClazz, Utils.getCommunityMap(spark, communityMapPath)); -// }); -// } public void run(Boolean isSparkSessionManaged, String inputPath, @@ -68,30 +43,6 @@ public class Extractor implements Serializable { }); } -// private static void extractRelationProjects(SparkSession spark, String inputPath, String outputPath){ -// Utils.readPath(spark, inputPath, Project.class) -// .flatMap((FlatMapFunction) project ->{ -// List relList = new ArrayList<>(); -// Optional.ofNullable(project.getCollectedfrom()) -// .ifPresent(cfl -> -// cfl.forEach(cf -> { -// Provenance provenance = Provenance.newInstance(cf.getDataInfo().getProvenanceaction().getClassname(), -// cf.getDataInfo().getTrust()); -// -// relList.add(getRelation(project.getId(), cf.getKey(), -// Constants.PROJECT_ENTITY, Constants.DATASOURCE_ENTITY, Constants.IS_FUNDED_BY, -// Constants.FUNDINGS, provenance)); -// relList.add(getRelation(cf.getKey(), project.getId(), -// Constants.DATASOURCE_ENTITY, Constants.PROJECT_ENTITY, Constants.FUNDS, -// Constants.FUNDINGS, provenance)); -// })); -// return relList.iterator(); -// }, Encoders.bean(Relation.class)) -// .write() -// .option("Compression", "gzip") -// .mode(SaveMode.Append) -// .json(outputPath); -// } private void extractRelationResult(SparkSession spark, String inputPath, @@ -192,8 +143,16 @@ public class Extractor implements Serializable { .newInstance( paction.getClassid(), dinfo.getTrust())) - .orElse(Provenance.newInstance(Constants.HARVESTED, Constants.DEFAULT_TRUST))) - .orElse(Provenance.newInstance(Constants.HARVESTED, Constants.DEFAULT_TRUST)); + .orElse( + Provenance + .newInstance( + eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED, + eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST))) + .orElse( + Provenance + .newInstance( + eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED, + eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)); Relation r = getRelation( value.getId(), cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,