From c5739c4266d03e452af6dad33b4f362c4960536e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 22 Dec 2021 15:08:33 +0100 Subject: [PATCH] [BipFinder] create action set for the measures at the level of the result --- .../bipfinder/SparkAtomicActionScoreJob.java | 6 +-- .../{bipfinder => common}/BipDeserialize.java | 4 +- .../{bipfinder => common}/BipScore.java | 4 +- .../{bipfinder => common}/Constants.java | 2 +- .../createunresolvedentities/Constants.java | 49 ------------------- .../PrepareBipFinder.java | 5 +- .../PrepareFOSSparkJob.java | 2 +- .../SparkSaveUnresolved.java | 2 +- 8 files changed, 14 insertions(+), 60 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{bipfinder => common}/BipDeserialize.java (83%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{bipfinder => common}/BipScore.java (84%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/{bipfinder => common}/Constants.java (96%) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index 8cda19d07..f3f1db222 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; +import static eu.dnetlib.dhp.actionmanager.common.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -16,16 +16,16 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.common.BipDeserialize; +import eu.dnetlib.dhp.actionmanager.common.BipScore; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipDeserialize.java similarity index 83% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipDeserialize.java index d5b2ced7c..d29d3bdde 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipDeserialize.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipDeserialize.java @@ -1,11 +1,13 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.common; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import eu.dnetlib.dhp.actionmanager.bipfinder.Score; + /** * Class that maps the model of the bipFinder! input data. * Only needed for deserialization purposes diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipScore.java similarity index 84% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipScore.java index 247546694..6bfce2c0f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/BipScore.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/BipScore.java @@ -1,9 +1,11 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.common; import java.io.Serializable; import java.util.List; +import eu.dnetlib.dhp.actionmanager.bipfinder.Score; + /** * Rewriting of the bipFinder input data by extracting the identifier of the result (doi) */ diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/Constants.java similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Constants.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/Constants.java index 0e08fd06c..e91d41438 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/Constants.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/common/Constants.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.bipfinder; +package eu.dnetlib.dhp.actionmanager.common; import java.util.Optional; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java deleted file mode 100644 index c508d4dbc..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/Constants.java +++ /dev/null @@ -1,49 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.createunresolvedentities; - -import java.util.Optional; - -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -public class Constants { - - public static final String DOI = "doi"; - - public static final String UPDATE_DATA_INFO_TYPE = "update"; - public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos"; - public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE"; - public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip"; - - public static final String FOS_CLASS_ID = "FOS"; - public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification"; - - public static final String NULL = "NULL"; - - public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private Constants() { - } - - public static Boolean isSparkSessionManaged(ArgumentApplicationParser parser) { - return Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - } - - public static Dataset readPath( - SparkSession spark, String inputPath, Class clazz) { - return spark - .read() - .textFile(inputPath) - .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java index 3d68db27b..c850012c7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareBipFinder.java @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.UPDATE_CLASS_NAME; +import static eu.dnetlib.dhp.actionmanager.common.Constants.*; +import static eu.dnetlib.dhp.actionmanager.common.Constants.UPDATE_CLASS_NAME; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -11,7 +11,6 @@ import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.hdfs.client.HdfsUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 5ae2f8c88..5f750b025 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; +import static eu.dnetlib.dhp.actionmanager.common.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java index 62b813602..08e63ee1e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/SparkSaveUnresolved.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.actionmanager.createunresolvedentities; -import static eu.dnetlib.dhp.actionmanager.createunresolvedentities.Constants.*; +import static eu.dnetlib.dhp.actionmanager.common.Constants.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable;