From 159388f9c220e33810d50c7a4552ac640c78eb92 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 16 Oct 2023 11:26:07 +0200 Subject: [PATCH] testing and fix some issues --- .../main/java/eu/dnetlib/dhp/api/Utils.java | 7 + .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 11 +- .../community/QueryInformationSystem.java | 34 -- ...kResultToCommunityFromOrganizationJob.java | 50 +-- .../PrepareResultCommunitySet.java | 15 +- .../SparkResultToCommunityFromProject.java | 13 +- .../input_communitytoresult_parameters.json | 13 +- ...t_preparecommunitytoresult_parameters.json | 12 +- .../oozie_app/workflow.xml | 115 +------ .../input_communitytoresult_parameters.json | 28 ++ ...t_preparecommunitytoresult_parameters.json | 33 ++ .../QueryCommunityAPITest.java | 16 +- .../PrepareAssocTest.java | 95 ++++++ .../PrepareAssocTest.java | 88 +++++ .../ResultToCommunityJobTest.java | 323 ++++++++++++++++++ .../preparedInfo/resultCommunityList.json | 36 ++ .../preparedInfo/resultCommunityList.json.gz | Bin 939 -> 0 bytes .../relation/relation | 20 ++ .../preparedInfo/resultcommunitylist | 4 + .../relation/relation | 20 ++ .../sample/dataset | 10 + .../sample/otherresearchproduct | 0 .../sample/publication | 0 .../sample/software | 0 24 files changed, 736 insertions(+), 207 deletions(-) delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json rename dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/{bulktag => api}/QueryCommunityAPITest.java (90%) create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareAssocTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareAssocTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromproject/ResultToCommunityJobTest.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json delete mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json.gz create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/relation/relation create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/preparedInfo/resultcommunitylist create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/relation/relation create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/dataset create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/otherresearchproduct create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/publication create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromproject/sample/software diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index a0eacb774..43d5e7e98 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -11,6 +11,9 @@ import java.util.stream.Collectors; import javax.management.Query; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import com.amazonaws.util.StringUtils; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; @@ -21,6 +24,7 @@ import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; import eu.dnetlib.dhp.bulktag.community.Provider; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; +import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob; /** * @author miriam.baglioni @@ -30,6 +34,8 @@ public class Utils implements Serializable { private static final ObjectMapper MAPPER = new ObjectMapper(); private static final VerbResolver resolver = VerbResolverFactory.newInstance(); + private static final Logger log = LoggerFactory.getLogger(Utils.class); + public static CommunityConfiguration getCommunityConfiguration(boolean production) throws IOException { final Map communities = Maps.newHashMap(); List validCommunities = new ArrayList<>(); @@ -126,6 +132,7 @@ public class Utils implements Serializable { throw new RuntimeException(e); } }); + return organizationMap; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index b24ee129a..68c740dd5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -95,10 +95,7 @@ public class SparkBulkTagJob { Dataset datasources = readPath( spark, inputPath - .substring( - 0, - inputPath.lastIndexOf("/")) - + "/datasource", + + "datasource", Datasource.class) .filter((FilterFunction) ds -> isOKDatasource(ds)) .map((MapFunction) ds -> ds.getId(), Encoders.STRING()); @@ -106,10 +103,10 @@ public class SparkBulkTagJob { Map>> dsm = cc.getEoscDatasourceMap(); for (String ds : datasources.collectAsList()) { - final String dsId = ds.substring(3); - if (!dsm.containsKey(dsId)) { + // final String dsId = ds.substring(3); + if (!dsm.containsKey(ds)) { ArrayList> eoscList = new ArrayList<>(); - dsm.put(dsId, eoscList); + dsm.put(ds, eoscList); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java deleted file mode 100644 index 5fe3cf81f..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ /dev/null @@ -1,34 +0,0 @@ - -package eu.dnetlib.dhp.bulktag.community; - -import java.io.IOException; -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.dom4j.DocumentException; -import org.xml.sax.SAXException; - -import com.google.common.base.Joiner; - -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -public class QueryInformationSystem { - - public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) - throws ISLookUpException, DocumentException, SAXException, IOException { - ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); - final List res = isLookUp - .quickSearchProfile( - IOUtils - .toString( - QueryInformationSystem.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bulktag/query.xq"))); - - final String xmlConf = "" + Joiner.on(" ").join(res) + ""; - - return CommunityConfigurationFactory.newInstance(xmlConf); - } -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 43f425b68..fe79f1be1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.resulttocommunityfromorganization; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; import java.util.Arrays; @@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; @@ -53,22 +54,15 @@ public class SparkResultToCommunityFromOrganizationJob { final String possibleupdatespath = parser.get("preparedInfoPath"); log.info("preparedInfoPath: {}", possibleupdatespath); - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - @SuppressWarnings("unchecked") - Class resultClazz = (Class) Class.forName(resultClassName); - SparkConf conf = new SparkConf(); - conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - runWithSparkHiveSession( + runWithSparkSession( conf, isSparkSessionManaged, spark -> { - removeOutputDir(spark, outputPath); + // removeOutputDir(spark, outputPath); - execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath); + execPropagation(spark, inputPath, outputPath, possibleupdatespath); }); } @@ -77,22 +71,32 @@ public class SparkResultToCommunityFromOrganizationJob { SparkSession spark, String inputPath, String outputPath, - Class resultClazz, String possibleUpdatesPath) { Dataset possibleUpdates = readPath(spark, possibleUpdatesPath, ResultCommunityList.class); - Dataset result = readPath(spark, inputPath, resultClazz); - result - .joinWith( - possibleUpdates, - result.col("id").equalTo(possibleUpdates.col("resultId")), - "left_outer") - .map(resultCommunityFn(), Encoders.bean(resultClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); + ModelSupport.entityTypes + .keySet() + .parallelStream() + .forEach(e -> { + if (ModelSupport.isResult(e)) { + Class resultClazz = ModelSupport.entityTypes.get(e); + removeOutputDir(spark, outputPath + e.name()); + Dataset result = readPath(spark, inputPath + e.name(), resultClazz); + + result + .joinWith( + possibleUpdates, + result.col("id").equalTo(possibleUpdates.col("resultId")), + "left_outer") + .map(resultCommunityFn(), Encoders.bean(resultClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); + } + }); + } private static MapFunction, R> resultCommunityFn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java index b0fbf8056..883f5ca86 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.resulttocommunityfromproject; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.*; @@ -51,16 +52,15 @@ public class PrepareResultCommunitySet { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final boolean production = Boolean.valueOf(parser.get("outputPath")); + final boolean production = Boolean.valueOf(parser.get("production")); log.info("production: {}", production); final CommunityEntityMap projectsMap = Utils.getCommunityProjects(production); log.info("projectsMap: {}", new Gson().toJson(projectsMap)); SparkConf conf = new SparkConf(); - conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - runWithSparkHiveSession( + runWithSparkSession( conf, isSparkSessionManaged, spark -> { @@ -94,24 +94,27 @@ public class PrepareResultCommunitySet { .select( new Column("source").as("resultId"), new Column("target").as("projectId")) - .groupByKey((MapFunction) r -> (String) r.getAs("source"), Encoders.STRING()) + .groupByKey((MapFunction) r -> (String) r.getAs("resultId"), Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, v) -> { ResultProjectList rpl = new ResultProjectList(); rpl.setResultId(k); ArrayList cl = new ArrayList<>(); - cl.addAll(projectMap.get(v.next().getAs("target"))); + cl.addAll(projectMap.get(v.next().getAs("projectId"))); v.forEachRemaining(r -> { projectMap - .get(r.getAs("target")) + .get(r.getAs("projectId")) .forEach(c -> { if (!cl.contains(c)) cl.add(c); }); }); + if(cl.size() == 0) + return null; rpl.setCommunityList(cl); return rpl; }, Encoders.bean(ResultProjectList.class)) + .filter(Objects::nonNull) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index 6d4779ea1..daef6a317 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.resulttocommunityfromproject; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.ArrayList; @@ -61,20 +62,14 @@ public class SparkResultToCommunityFromProject implements Serializable { final String possibleupdatespath = parser.get("preparedInfoPath"); log.info("preparedInfoPath: {}", possibleupdatespath); - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - @SuppressWarnings("unchecked") - Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); - conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - runWithSparkHiveSession( + + runWithSparkSession( conf, isSparkSessionManaged, spark -> { -// removeOutputDir(spark, outputPath); execPropagation(spark, inputPath, outputPath, possibleupdatespath); @@ -108,7 +103,7 @@ public class SparkResultToCommunityFromProject implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath); + .json(outputPath + e.name()); } }); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json index b6eb309a5..0db8085d1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json @@ -5,12 +5,7 @@ "paramDescription": "the path of the sequencial file to read", "paramRequired": true }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, + { "paramName": "out", "paramLongName": "outputPath", @@ -23,12 +18,6 @@ "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, { "paramName": "p", "paramLongName": "preparedInfoPath", diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json index 8df509abf..8b6291e5d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json @@ -5,12 +5,6 @@ "paramDescription": "the path of the sequencial file to read", "paramRequired": true }, - { - "paramName":"ocm", - "paramLongName":"organizationtoresultcommunitymap", - "paramDescription": "the map for the association organization communities", - "paramRequired": true - }, { "paramName":"h", "paramLongName":"hive_metastore_uris", @@ -28,6 +22,12 @@ "paramLongName": "outputPath", "paramDescription": "the path used to store temporary output files", "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "production", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml index b25822ad0..55490e25c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -22,7 +22,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -90,8 +90,8 @@ eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=4 + --executor-memory=10G --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -103,20 +103,13 @@ --sourcePath${sourcePath}/relation --outputPath${workingDir}/preparedInfo/resultCommunityList --hive_metastore_uris${hive_metastore_uris} - + --production${production} - + - - - - - - - - + yarn cluster @@ -135,104 +128,14 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList - --sourcePath${sourcePath}/publication - --outputPath${outputPath}/publication - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --saveGraph${saveGraph} + --sourcePath${sourcePath}/ + --outputPath${outputPath}/ - + - - - yarn - cluster - community2resultfromorganization-Dataset - eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList - --sourcePath${sourcePath}/dataset - --outputPath${outputPath}/dataset - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --saveGraph${saveGraph} - - - - - - - yarn - cluster - community2resultfromorganization-ORP - eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList - --sourcePath${sourcePath}/otherresearchproduct - --outputPath${outputPath}/otherresearchproduct - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --saveGraph${saveGraph} - - - - - - - - yarn - cluster - community2resultfromorganization-Software - eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList - --sourcePath${sourcePath}/software - --outputPath${outputPath}/software - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --saveGraph${saveGraph} - - - - - - diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json new file mode 100644 index 000000000..0db8085d1 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json @@ -0,0 +1,28 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "p", + "paramLongName": "preparedInfoPath", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": true + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json new file mode 100644 index 000000000..8b6291e5d --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json @@ -0,0 +1,33 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "production", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/QueryCommunityAPITest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java similarity index 90% rename from dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/QueryCommunityAPITest.java rename to dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java index a0083dab8..6ee01a6f0 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/QueryCommunityAPITest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.bulktag; +package eu.dnetlib.dhp.api; import java.util.List; @@ -98,14 +98,22 @@ public class QueryCommunityAPITest { @Test void getCommunityProjects() throws Exception { CommunityEntityMap projectMap = Utils.getCommunityProjects(true); - Assertions.assertFalse(projectMap.containsKey("mes")); - Assertions.assertEquals(33, projectMap.size()); + Assertions .assertTrue( projectMap .keySet() .stream() - .allMatch(k -> projectMap.get(k).stream().allMatch(p -> p.startsWith("40|")))); + .allMatch(k -> k.startsWith("40|"))); + + System.out.println(projectMap); + } + + @Test + void getCommunityOrganizations() throws Exception { + CommunityEntityMap organizationMap = Utils.getCommunityOrganization(true); + Assertions.assertTrue(organizationMap.keySet().stream().allMatch(k -> k.startsWith("20|"))); + } } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareAssocTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareAssocTest.java new file mode 100644 index 000000000..6536ecc85 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareAssocTest.java @@ -0,0 +1,95 @@ + +package eu.dnetlib.dhp.resulttocommunityfromorganization; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.Gson; + +import eu.dnetlib.dhp.api.Utils; +import eu.dnetlib.dhp.api.model.CommunityEntityMap; +import eu.dnetlib.dhp.bulktag.BulkTagJobTest; +import eu.dnetlib.dhp.bulktag.SparkBulkTagJob; +import eu.dnetlib.dhp.schema.oaf.Dataset; + +/** + * @author miriam.baglioni + * @Date 13/10/23 + */ +public class PrepareAssocTest { + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(PrepareAssocTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(BulkTagJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(BulkTagJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareAssocTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void test1() throws Exception { + + PrepareResultCommunitySet + .main( + new String[] { + + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", + getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/relation/").getPath(), + "-outputPath", workingDir.toString() + "/prepared", + "-production", Boolean.TRUE.toString(), + "-hive_metastore_uris", "" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/prepared") + .map(item -> new ObjectMapper().readValue(item, ResultCommunityList.class)); + + tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareAssocTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareAssocTest.java new file mode 100644 index 000000000..0e10b3edf --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareAssocTest.java @@ -0,0 +1,88 @@ + +package eu.dnetlib.dhp.resulttocommunityfromproject; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.bulktag.BulkTagJobTest; +import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; + +/** + * @author miriam.baglioni + * @Date 13/10/23 + */ +public class PrepareAssocTest { + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(PrepareAssocTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(BulkTagJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(BulkTagJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareAssocTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void test1() throws Exception { + + PrepareResultCommunitySet + .main( + new String[] { + + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", + getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/relation/").getPath(), + "-outputPath", workingDir.toString() + "/prepared", + "-production", Boolean.TRUE.toString(), + "-hive_metastore_uris", "" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/prepared") + .map(item -> new ObjectMapper().readValue(item, ResultProjectList.class)); + + tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromproject/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromproject/ResultToCommunityJobTest.java new file mode 100644 index 000000000..6a5726cbe --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromproject/ResultToCommunityJobTest.java @@ -0,0 +1,323 @@ + +package eu.dnetlib.dhp.resulttocommunityfromproject; + +import static org.apache.spark.sql.functions.desc; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.orcidtoresultfromsemrel.OrcidPropagationJobTest; +import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob; +import eu.dnetlib.dhp.schema.oaf.Dataset; + +public class ResultToCommunityJobTest { + + private static final Logger log = LoggerFactory.getLogger(ResultToCommunityJobTest.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(ResultToCommunityJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(ResultToCommunityJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(OrcidPropagationJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void testSparkResultToCommunityFromProjectJob() throws Exception { + final String preparedInfoPath = getClass() + .getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/preparedInfo") + .getPath(); + SparkResultToCommunityFromProject + .main( + new String[] { + + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", getClass() + .getResource("/eu/dnetlib/dhp/resulttocommunityfromproject/sample/") + .getPath(), + + "-outputPath", workingDir.toString() + "/", + "-preparedInfoPath", preparedInfoPath + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + + tmp.foreach(d -> System.out.println(new ObjectMapper().writeValueAsString(d))); +// Assertions.assertEquals(10, tmp.count()); +// org.apache.spark.sql.Dataset verificationDataset = spark +// .createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); +// +// verificationDataset.createOrReplaceTempView("dataset"); +// +// String query = "select id, MyT.id community " +// + "from dataset " +// + "lateral view explode(context) c as MyT " +// + "lateral view explode(MyT.datainfo) d as MyD " +// + "where MyD.inferenceprovenance = 'propagation'"; +// +// org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); +// Assertions.assertEquals(5, resultExplodedProvenance.count()); +// Assertions +// .assertEquals( +// 0, +// resultExplodedProvenance +// .filter("id = '50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe'") +// .count()); +// Assertions +// .assertEquals( +// 1, +// resultExplodedProvenance +// .filter("id = '50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b'") +// .count()); +// Assertions +// .assertEquals( +// "beopen", +// resultExplodedProvenance +// .select("community") +// .where( +// resultExplodedProvenance +// .col("id") +// .equalTo( +// "50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b")) +// .collectAsList() +// .get(0) +// .getString(0)); +// +// Assertions +// .assertEquals( +// 2, +// resultExplodedProvenance +// .filter("id = '50|od________18::8887b1df8b563c4ea851eb9c882c9d7b'") +// .count()); +// Assertions +// .assertEquals( +// "mes", +// resultExplodedProvenance +// .select("community") +// .where( +// resultExplodedProvenance +// .col("id") +// .equalTo( +// "50|od________18::8887b1df8b563c4ea851eb9c882c9d7b")) +// .sort(desc("community")) +// .collectAsList() +// .get(0) +// .getString(0)); +// Assertions +// .assertEquals( +// "euromarine", +// resultExplodedProvenance +// .select("community") +// .where( +// resultExplodedProvenance +// .col("id") +// .equalTo( +// "50|od________18::8887b1df8b563c4ea851eb9c882c9d7b")) +// .sort(desc("community")) +// .collectAsList() +// .get(1) +// .getString(0)); +// +// Assertions +// .assertEquals( +// 1, +// resultExplodedProvenance +// .filter("id = '50|doajarticles::8d817039a63710fcf97e30f14662c6c8'") +// .count()); +// Assertions +// .assertEquals( +// "mes", +// resultExplodedProvenance +// .select("community") +// .where( +// resultExplodedProvenance +// .col("id") +// .equalTo( +// "50|doajarticles::8d817039a63710fcf97e30f14662c6c8")) +// .sort(desc("community")) +// .collectAsList() +// .get(0) +// .getString(0)); +// +// Assertions +// .assertEquals( +// 1, +// resultExplodedProvenance +// .filter("id = '50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6'") +// .count()); +// Assertions +// .assertEquals( +// "mes", +// resultExplodedProvenance +// .select("community") +// .where( +// resultExplodedProvenance +// .col("id") +// .equalTo( +// "50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6")) +// .sort(desc("community")) +// .collectAsList() +// .get(0) +// .getString(0)); +// +// query = "select id, MyT.id community " +// + "from dataset " +// + "lateral view explode(context) c as MyT " +// + "lateral view explode(MyT.datainfo) d as MyD "; +// +// org.apache.spark.sql.Dataset resultCommunityId = spark.sql(query); +// +// Assertions.assertEquals(10, resultCommunityId.count()); +// +// Assertions +// .assertEquals( +// 1, +// resultCommunityId +// .filter("id = '50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe'") +// .count()); +// Assertions +// .assertEquals( +// "beopen", +// resultCommunityId +// .select("community") +// .where( +// resultCommunityId +// .col("id") +// .equalTo( +// "50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe")) +// .collectAsList() +// .get(0) +// .getString(0)); +// +// Assertions +// .assertEquals( +// 1, +// resultCommunityId +// .filter("id = '50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b'") +// .count()); +// +// Assertions +// .assertEquals( +// 3, +// resultCommunityId +// .filter("id = '50|od________18::8887b1df8b563c4ea851eb9c882c9d7b'") +// .count()); +// Assertions +// .assertEquals( +// "beopen", +// resultCommunityId +// .select("community") +// .where( +// resultCommunityId +// .col("id") +// .equalTo( +// "50|od________18::8887b1df8b563c4ea851eb9c882c9d7b")) +// .sort(desc("community")) +// .collectAsList() +// .get(2) +// .getString(0)); +// +// Assertions +// .assertEquals( +// 2, +// resultCommunityId +// .filter("id = '50|doajarticles::8d817039a63710fcf97e30f14662c6c8'") +// .count()); +// Assertions +// .assertEquals( +// "euromarine", +// resultCommunityId +// .select("community") +// .where( +// resultCommunityId +// .col("id") +// .equalTo( +// "50|doajarticles::8d817039a63710fcf97e30f14662c6c8")) +// .sort(desc("community")) +// .collectAsList() +// .get(1) +// .getString(0)); +// +// Assertions +// .assertEquals( +// 3, +// resultCommunityId +// .filter("id = '50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6'") +// .count()); +// Assertions +// .assertEquals( +// "euromarine", +// resultCommunityId +// .select("community") +// .where( +// resultCommunityId +// .col("id") +// .equalTo( +// "50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6")) +// .sort(desc("community")) +// .collectAsList() +// .get(2) +// .getString(0)); +// Assertions +// .assertEquals( +// "ni", +// resultCommunityId +// .select("community") +// .where( +// resultCommunityId +// .col("id") +// .equalTo( +// "50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6")) +// .sort(desc("community")) +// .collectAsList() +// .get(0) +// .getString(0)); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json new file mode 100644 index 000000000..bd4674c4f --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json @@ -0,0 +1,36 @@ +{"communityList":["beopen"],"resultId":"50|dedup_wf_001::afaf128022d29872c4dad402b2db04fe"} +{"communityList":["beopen"],"resultId":"50|dedup_wf_001::3f62cfc27024d564ea86760c494ba93b"} +{"communityList":["euromarine","mes"],"resultId":"50|od________18::8887b1df8b563c4ea851eb9c882c9d7b"} +{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::8d817039a63710fcf97e30f14662c6c8"} +{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::3c98f0632f1875b4979e552ba3aa01e6"} +{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::53b70ea6e0769d02ddf93307ec8e3e92"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::ef1ac6efc10f420fa9e190e49644f1f2"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::b738aa93950dddfb0294df2e8fdf0579"} +{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::befccb1f9e6b833fd82e587737ae9e7d"} +{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::bf1cba621615e27db1692865a5f35a0b"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::e105de571b336daae05f0e75cf740c5c"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::1fe4f347c9df657b7ba520987d79436e"} +{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::6d7c00a8c8e59f0215459e2e4ee3fd6c"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::39ec88ef4127db0ea1b88938f1c52889"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::3496709db804d98f76c45d7ed023dd95"} +{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::dc97fffbdb6d35f792fc0ab428ff065c"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::b61d082d96619d9b7a876e6dce44cf65"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::32a96881c3036cf2d2165bb2d276ea82"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::76e9e6a959ba588483c74ec580369864"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::1487a0a92572376d95d6cc3f066504b7"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::413a0a0656f888cce9c15f6be6df60e3"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::25c3e91960cbd7a8f95a2e511cbffddd"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::4d523b52094a689856e479bb99063c7a"} +{"communityList":["euromarine","mes"],"resultId":"50|od______2663::393c7262bb71642b7bb4c67cfeab02c5"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::0b3333d875b91ffa4db0735efec94e7a"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::0699c30043edfae40786d80acd20d300"} +{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::7f29ade677e66ffbf1312fa837bc73ca"} +{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::7ba6627ac7590d367cc01bbac4d518e8"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::8ad9bc047433401947dc0cdb4a989cee"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::cce70f27d85df658479d0ec0046a4eb3"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::74304be834b7013dbaeb73c3a19a654b"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::46fc13a87befb6a83ac9c63580528ab0"} +{"communityList":["euromarine","mes"],"resultId":"50|doajarticles::66f1867488b62d9c9fb734273775e203"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::79236995d5c30e5234a47cee4a728cae"} +{"communityList":["euromarine","mes"],"resultId":"50|od______2386::cb7f6cb01d1a835612731d645842f699"} +{"communityList":["euromarine","mes"],"resultId":"50|dedup_wf_001::d424daa43f97a434eb0a12289410cade"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json.gz b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo/resultCommunityList.json.gz deleted file mode 100644 index 8b452d0e17e06baf8f3ac6d62277c2a08d88a13c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 939 zcmV;c162GUiwFqE4x3&819D|^b!>D)Z*6ULZfSIROlfm;E^2dcZUC)UNs44g4Bh(} zx$cD`LZP8@l!t}WKNm@oi~mx3dP9D_`Sa7$m-mmK zfBoU{)92g6kGGT0AM*X}r#H8c`RU8M&)>)G;kMzo$oTT%?JxBfU_3lnS&b<$XG|5I zj$wf?&x`|DdHeR=*L$D6il&!OhKaEW(}bA7QSq3r^Uu%DFCU+u?BnD6yuG~3&%s|TnnNK`39&|9gmQR^^)cy&=s6Bd#i*H5QLtIB+1TkZ{EX71BWqR2%q`4|Ej2kV zLaE`$Xbu`XC&wA2Q)t_+wGHmSqYzhtV#169)z=f?4T2SrFGW$MVAci z#cRb&=If|3Y@6Y=v(gpw{#pKlLaMD5z?D&!2rLu=<}o4q`)eGAYpO*Oa}u@F)L3Jd z$>QU@{Qg;9qjXTxWOLryKfW1p80T2`*F7Zf?_;kvCf4TVfQ)M)3`GtHkAtS7p7*EN zE0oqDLv^;uehEwIGY-fOHYCG57pbymXd_8k?Iti*9Lp|GzHD@<-FQ*EK