From f446580e9fe5b1f4be898ddb77587435465648d3 Mon Sep 17 00:00:00 2001 From: miconis Date: Mon, 29 Mar 2021 16:10:46 +0200 Subject: [PATCH] code refactoring (useless classes and wf removed), implementation of the test for the openorgs dedup --- dhp-workflows/dhp-dedup-openaire/pom.xml | 6 + .../dhp/oa/dedup/SparkCollectSimRels.java | 184 -------- .../oa/dedup/SparkCopyOpenorgsSimRels.java | 1 + .../oa/dedup/collectSimRels_parameters.json | 44 -- .../neworgs/oozie_app/config-default.xml | 18 - .../oa/dedup/neworgs/oozie_app/workflow.xml | 208 --------- .../orgsdedup/oozie_app/config-default.xml | 18 - .../oa/dedup/orgsdedup/oozie_app/workflow.xml | 240 ----------- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 78 +--- .../dhp/oa/dedup/SparkOpenorgsDedupTest.java | 408 ++++++++++++++++++ ...39-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz | Bin 0 -> 4844 bytes ...39-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz | Bin 0 -> 3428 bytes ...39-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz | Bin 0 -> 3191 bytes ...9f-4ae6-4db9-919d-85ddc0a60f92-c000.txt.gz | Bin 0 -> 683 bytes ...9f-4ae6-4db9-919d-85ddc0a60f92-c000.txt.gz | Bin 0 -> 1755 bytes .../graph/sql/queryOpenOrgsForOrgsDedup.sql | 4 +- .../queryOpenOrgsSimilarityForOrgsDedup.sql | 2 +- .../dhp/oa/graph/sql/queryOrganizations.sql | 2 +- 18 files changed, 424 insertions(+), 789 deletions(-) delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCollectSimRels.java delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/collectSimRels_parameters.json delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/neworgs/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/neworgs/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/orgsdedup/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/orgsdedup/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/organization/part-00000-5248a339-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/organization/part-00001-5248a339-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/organization/part-00002-5248a339-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/relation/part-00000-94553c9f-4ae6-4db9-919d-85ddc0a60f92-c000.txt.gz create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/relation/part-00003-94553c9f-4ae6-4db9-919d-85ddc0a60f92-c000.txt.gz diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index 04e158542..52cc149a9 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -94,6 +94,12 @@ org.apache.httpcomponents httpclient + + com.h2database + h2 + 1.4.200 + test + diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCollectSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCollectSimRels.java deleted file mode 100644 index f9e6448b0..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCollectSimRels.java +++ /dev/null @@ -1,184 +0,0 @@ - -package eu.dnetlib.dhp.oa.dedup; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.sql.*; -import org.dom4j.DocumentException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.config.DedupConfig; -import scala.Tuple2; - -public class SparkCollectSimRels extends AbstractSparkAction { - - private static final Logger log = LoggerFactory.getLogger(SparkCollectSimRels.class); - - Dataset simGroupsDS; - Dataset groupsDS; - - public SparkCollectSimRels(ArgumentApplicationParser parser, SparkSession spark, Dataset simGroupsDS, - Dataset groupsDS) { - super(parser, spark); - this.simGroupsDS = simGroupsDS; - this.groupsDS = groupsDS; - } - - public static void main(String[] args) throws Exception { - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkBlockStats.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/collectSimRels_parameters.json"))); - parser.parseArgument(args); - - SparkConf conf = new SparkConf(); - - final String dbUrl = parser.get("postgresUrl"); - final String dbUser = parser.get("postgresUser"); - final String dbPassword = parser.get("postgresPassword"); - - SparkSession spark = getSparkSession(conf); - - DataFrameReader readOptions = spark - .read() - .format("jdbc") - .option("url", dbUrl) - .option("user", dbUser) - .option("password", dbPassword); - - new SparkCollectSimRels( - parser, - spark, - readOptions.option("dbtable", "similarity_groups").load(), - readOptions.option("dbtable", "groups").load()) - .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl"))); - } - - @Override - void run(ISLookUpService isLookUpService) throws DocumentException, ISLookUpException, IOException { - - // read oozie parameters - final String isLookUpUrl = parser.get("isLookUpUrl"); - final String actionSetId = parser.get("actionSetId"); - final String workingPath = parser.get("workingPath"); - final int numPartitions = Optional - .ofNullable(parser.get("numPartitions")) - .map(Integer::valueOf) - .orElse(NUM_PARTITIONS); - final String dbUrl = parser.get("postgresUrl"); - final String dbUser = parser.get("postgresUser"); - - log.info("numPartitions: '{}'", numPartitions); - log.info("isLookUpUrl: '{}'", isLookUpUrl); - log.info("actionSetId: '{}'", actionSetId); - log.info("workingPath: '{}'", workingPath); - log.info("postgresUser: {}", dbUser); - log.info("postgresUrl: {}", dbUrl); - log.info("postgresPassword: xxx"); - - JavaPairRDD> similarityGroup = simGroupsDS - .toJavaRDD() - .mapToPair(r -> new Tuple2<>(r.getString(0), r.getString(1))) - .groupByKey() - .mapToPair( - i -> new Tuple2<>(i._1(), StreamSupport - .stream(i._2().spliterator(), false) - .collect(Collectors.toList()))); - - JavaPairRDD groupIds = groupsDS - .toJavaRDD() - .mapToPair(r -> new Tuple2<>(r.getString(0), r.getString(1))); - - JavaRDD, List>> groups = similarityGroup - .leftOuterJoin(groupIds) - .filter(g -> g._2()._2().isPresent()) - .map(g -> new Tuple2<>(new Tuple2<>(g._1(), g._2()._2().get()), g._2()._1())); - - JavaRDD relations = groups.flatMap(g -> { - String firstId = g._2().get(0); - List rels = new ArrayList<>(); - - for (String id : g._2()) { - if (!firstId.equals(id)) - rels.add(createSimRel(firstId, id, g._1()._2())); - } - - return rels.iterator(); - }); - - Dataset resultRelations = spark - .createDataset( - relations.filter(r -> r.getRelType().equals("resultResult")).rdd(), - Encoders.bean(Relation.class)) - .repartition(numPartitions); - - Dataset organizationRelations = spark - .createDataset( - relations.filter(r -> r.getRelType().equals("organizationOrganization")).rdd(), - Encoders.bean(Relation.class)) - .repartition(numPartitions); - - for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { - switch (dedupConf.getWf().getSubEntityValue()) { - case "organization": - savePostgresRelation(organizationRelations, workingPath, actionSetId, "organization"); - break; - default: - savePostgresRelation( - resultRelations, workingPath, actionSetId, dedupConf.getWf().getSubEntityValue()); - break; - } - } - - } - - private Relation createSimRel(String source, String target, String entity) { - final Relation r = new Relation(); - r.setSubRelType("dedupSimilarity"); - r.setRelClass("isSimilarTo"); - r.setDataInfo(new DataInfo()); - - switch (entity) { - case "result": - r.setSource("50|" + source); - r.setTarget("50|" + target); - r.setRelType("resultResult"); - break; - case "organization": - r.setSource("20|" + source); - r.setTarget("20|" + target); - r.setRelType("organizationOrganization"); - break; - default: - throw new IllegalArgumentException("unmanaged entity type: " + entity); - } - return r; - } - - private void savePostgresRelation(Dataset newRelations, String workingPath, String actionSetId, - String entityType) { - newRelations - .write() - .mode(SaveMode.Append) - .parquet(DedupUtility.createSimRelPath(workingPath, actionSetId, entityType)); - } - -} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java index 8cffacd7e..dbcd40289 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java @@ -9,6 +9,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.graphx.Edge; import org.apache.spark.rdd.RDD; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/collectSimRels_parameters.json b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/collectSimRels_parameters.json deleted file mode 100644 index da1011371..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/collectSimRels_parameters.json +++ /dev/null @@ -1,44 +0,0 @@ -[ - { - "paramName": "la", - "paramLongName": "isLookUpUrl", - "paramDescription": "address for the LookUp", - "paramRequired": true - }, - { - "paramName": "asi", - "paramLongName": "actionSetId", - "paramDescription": "action set identifier (name of the orchestrator)", - "paramRequired": true - }, - { - "paramName": "w", - "paramLongName": "workingPath", - "paramDescription": "path of the working directory", - "paramRequired": true - }, - { - "paramName": "np", - "paramLongName": "numPartitions", - "paramDescription": "number of partitions for the similarity relations intermediate phases", - "paramRequired": false - }, - { - "paramName": "purl", - "paramLongName": "postgresUrl", - "paramDescription": "the url of the postgres server", - "paramRequired": true - }, - { - "paramName": "pusr", - "paramLongName": "postgresUser", - "paramDescription": "the owner of the postgres database", - "paramRequired": true - }, - { - "paramName": "ppwd", - "paramLongName": "postgresPassword", - "paramDescription": "the password for the postgres user", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/neworgs/oozie_app/config-default.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/neworgs/oozie_app/config-default.xml deleted file mode 100644 index 2e0ed9aee..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/neworgs/oozie_app/config-default.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/neworgs/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/neworgs/oozie_app/workflow.xml deleted file mode 100644 index 9bfdaaebd..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/neworgs/oozie_app/workflow.xml +++ /dev/null @@ -1,208 +0,0 @@ - - - - graphBasePath - the raw graph base path - - - isLookUpUrl - the address of the lookUp service - - - actionSetId - id of the actionSet - - - workingPath - path for the working directory - - - dedupGraphPath - path for the output graph - - - cutConnectedComponent - max number of elements in a connected component - - - dbUrl - the url of the database - - - dbUser - the user of the database - - - dbTable - the name of the table in the database - - - dbPwd - the passowrd of the user of the database - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - -pb - ${graphBasePath}/relation - ${workingPath}/${actionSetId}/organization_simrel - - - - - - - - yarn - cluster - Create Similarity Relations - eu.dnetlib.dhp.oa.dedup.SparkCreateSimRels - dhp-dedup-openaire-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --graphBasePath${graphBasePath} - --isLookUpUrl${isLookUpUrl} - --actionSetId${actionSetId} - --workingPath${workingPath} - --numPartitions8000 - - - - - - - - yarn - cluster - Create Merge Relations - eu.dnetlib.dhp.oa.dedup.SparkCreateMergeRels - dhp-dedup-openaire-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --graphBasePath${graphBasePath} - --workingPath${workingPath} - --isLookUpUrl${isLookUpUrl} - --actionSetId${actionSetId} - --cutConnectedComponent${cutConnectedComponent} - - - - - - - - yarn - cluster - Prepare New Organizations - eu.dnetlib.dhp.oa.dedup.SparkPrepareNewOrgs - dhp-dedup-openaire-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --graphBasePath${graphBasePath} - --workingPath${workingPath} - --isLookUpUrl${isLookUpUrl} - --actionSetId${actionSetId} - --dbUrl${dbUrl} - --dbTable${dbTable} - --dbUser${dbUser} - --dbPwd${dbPwd} - --numConnections20 - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/orgsdedup/oozie_app/config-default.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/orgsdedup/oozie_app/config-default.xml deleted file mode 100644 index 2e0ed9aee..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/orgsdedup/oozie_app/config-default.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/orgsdedup/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/orgsdedup/oozie_app/workflow.xml deleted file mode 100644 index e7c95ee8d..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/orgsdedup/oozie_app/workflow.xml +++ /dev/null @@ -1,240 +0,0 @@ - - - - graphBasePath - the raw graph base path - - - isLookUpUrl - the address of the lookUp service - - - actionSetId - id of the actionSet - - - workingPath - path for the working directory - - - dedupGraphPath - path for the output graph - - - cutConnectedComponent - max number of elements in a connected component - - - dbUrl - the url of the database - - - dbUser - the user of the database - - - dbTable - the name of the table in the database - - - dbPwd - the passowrd of the user of the database - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - -pb - /tmp/graph_openorgs_and_corda/relation - ${workingPath}/${actionSetId}/organization_simrel - - - - - - - - yarn - cluster - Create Similarity Relations - eu.dnetlib.dhp.oa.dedup.SparkCreateSimRels - dhp-dedup-openaire-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --graphBasePath${graphBasePath} - --isLookUpUrl${isLookUpUrl} - --actionSetId${actionSetId} - --workingPath${workingPath} - --numPartitions8000 - - - - - - - - yarn - cluster - Create Merge Relations - eu.dnetlib.dhp.oa.dedup.SparkCreateMergeRels - dhp-dedup-openaire-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --graphBasePath${graphBasePath} - --workingPath${workingPath} - --isLookUpUrl${isLookUpUrl} - --actionSetId${actionSetId} - --cutConnectedComponent${cutConnectedComponent} - - - - - - - - yarn - cluster - Prepare Organization Relations - eu.dnetlib.dhp.oa.dedup.SparkPrepareOrgRels - dhp-dedup-openaire-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --graphBasePath${graphBasePath} - --workingPath${workingPath} - --isLookUpUrl${isLookUpUrl} - --actionSetId${actionSetId} - --dbUrl${dbUrl} - --dbTable${dbTable} - --dbUser${dbUser} - --dbPwd${dbPwd} - --numConnections20 - - - - - - - - yarn - cluster - Prepare New Organizations - eu.dnetlib.dhp.oa.dedup.SparkPrepareNewOrgs - dhp-dedup-openaire-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --graphBasePath${graphBasePath} - --workingPath${workingPath} - --isLookUpUrl${isLookUpUrl} - --actionSetId${actionSetId} - --apiUrl${apiUrl} - --dbUrl${dbUrl} - --dbTable${dbTable} - --dbUser${dbUser} - --dbPwd${dbPwd} - --numConnections20 - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 33da45feb..851e72dee 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -204,76 +204,8 @@ public class SparkDedupTest implements Serializable { assertEquals(6750, orp_simrel); } - @Disabled @Test @Order(2) - public void collectSimRelsTest() throws Exception { - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCollectSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/collectSimRels_parameters.json"))); - parser - .parseArgument( - new String[] { - "-asi", testActionSetId, - "-la", "lookupurl", - "-w", testOutputBasePath, - "-np", "50", - "-purl", "jdbc:postgresql://localhost:5432/dnet_dedup", - "-pusr", "postgres_user", - "-ppwd", "" - }); - - new SparkCollectSimRels( - parser, - spark, - spark.read().load(testDedupAssertionsBasePath + "/similarity_groups"), - spark.read().load(testDedupAssertionsBasePath + "/groups")) - .run(isLookUpService); - - long orgs_simrel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_simrel") - .count(); - - long pubs_simrel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_simrel") - .count(); - - long sw_simrel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_simrel") - .count(); - - long ds_simrel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_simrel") - .count(); - - long orp_simrel = spark - .read() - .json(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_simrel") - .count(); - -// System.out.println("orgs_simrel = " + orgs_simrel); -// System.out.println("pubs_simrel = " + pubs_simrel); -// System.out.println("sw_simrel = " + sw_simrel); -// System.out.println("ds_simrel = " + ds_simrel); -// System.out.println("orp_simrel = " + orp_simrel); - - assertEquals(3672, orgs_simrel); - assertEquals(10459, pubs_simrel); - assertEquals(3767, sw_simrel); - assertEquals(3865, ds_simrel); - assertEquals(10173, orp_simrel); - - } - - @Test - @Order(3) public void cutMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -369,7 +301,7 @@ public class SparkDedupTest implements Serializable { } @Test - @Order(4) + @Order(3) public void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -424,7 +356,7 @@ public class SparkDedupTest implements Serializable { } @Test - @Order(5) + @Order(4) public void createDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -471,7 +403,7 @@ public class SparkDedupTest implements Serializable { } @Test - @Order(6) + @Order(5) public void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -587,7 +519,7 @@ public class SparkDedupTest implements Serializable { } @Test - @Order(7) + @Order(6) public void propagateRelationTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -637,7 +569,7 @@ public class SparkDedupTest implements Serializable { } @Test - @Order(8) + @Order(7) public void testRelations() throws Exception { testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java new file mode 100644 index 000000000..f33eca57f --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java @@ -0,0 +1,408 @@ + +package eu.dnetlib.dhp.oa.dedup; + +import static java.nio.file.Files.createTempDirectory; + +import static org.apache.spark.sql.functions.count; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; + +import java.io.File; +import java.io.IOException; +import java.io.Serializable; +import java.net.URISyntaxException; +import java.nio.file.Paths; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.util.CollectionsUtils; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.platform.commons.util.StringUtils; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.utils.DHPUtils; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.pace.util.MapDocumentUtil; +import scala.Tuple2; + +@ExtendWith(MockitoExtension.class) +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class SparkOpenorgsDedupTest implements Serializable { + + private static String dbUrl = "jdbc:h2:mem:openorgs_test;DB_CLOSE_DELAY=-1;DATABASE_TO_UPPER=false"; + private static String dbUser = "sa"; + private static String dbTable = "tmp_dedup_events"; + private static String dbPwd = ""; + + @Mock(serializable = true) + ISLookUpService isLookUpService; + + protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + private static SparkSession spark; + private static JavaSparkContext jsc; + + private static String testGraphBasePath; + private static String testOutputBasePath; + private static String testDedupGraphBasePath; + private static final String testActionSetId = "test-orchestrator-openorgs"; + + @BeforeAll + public static void cleanUp() throws IOException, URISyntaxException { + + testGraphBasePath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/openorgs_dedup").toURI()) + .toFile() + .getAbsolutePath(); + testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + testDedupGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + + final SparkConf conf = new SparkConf(); + conf.set("spark.sql.shuffle.partitions", "200"); + spark = SparkSession + .builder() + .appName(SparkDedupTest.class.getSimpleName()) + .master("local[*]") + .config(conf) + .getOrCreate(); + + jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + } + + @BeforeEach + public void setUp() throws IOException, ISLookUpException { + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_openorgs.xml"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); + } + + @Test + @Order(1) + public void createSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50" + }); + + new SparkCreateSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + assertEquals(288, orgs_simrel); + } + + @Test + @Order(2) + public void copyOpenorgsSimRels() throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCopyOpenorgsSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/copyOpenorgsMergeRels_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-w", testOutputBasePath, + "-la", "lookupurl", + "-np", "50" + }); + + new SparkCopyOpenorgsSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + assertEquals(324, orgs_simrel); + } + + @Test + @Order(3) + public void createMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateMergeRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .count(); + assertEquals(132, orgs_mergerel); + + // verify that a DiffRel is in the mergerels (to be sure that the job supposed to remove them has something to + // do) + List diffRels = jsc + .textFile(DedupUtility.createEntityPath(testGraphBasePath, "relation")) + .map(s -> OBJECT_MAPPER.readValue(s, Relation.class)) + .filter(r -> r.getRelClass().equals("isDifferentFrom")) + .map(r -> r.getTarget()) + .collect(); + assertEquals(18, diffRels.size()); + + List mergeRels = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .map(r -> r.getTarget()) + .collect(); + assertFalse(Collections.disjoint(mergeRels, diffRels)); + + } + + @Test + @Order(4) + public void prepareOrgRelsTest() throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/prepareOrgRels_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath, + "-du", + dbUrl, + "-dusr", + dbUser, + "-t", + dbTable, + "-dpwd", + dbPwd + }); + + new SparkPrepareOrgRels(parser, spark).run(isLookUpService); + + final Properties connectionProperties = new Properties(); + connectionProperties.put("user", dbUser); + connectionProperties.put("password", dbPwd); + + Connection connection = DriverManager.getConnection(dbUrl, connectionProperties); + + ResultSet resultSet = connection + .prepareStatement("SELECT COUNT(*) as total_rels FROM " + dbTable) + .executeQuery(); + if (resultSet.next()) { + int total_rels = resultSet.getInt("total_rels"); + assertEquals(32, total_rels); + } else + fail("No result in the sql DB"); + resultSet.close(); + + // verify the number of organizations with duplicates + ResultSet resultSet2 = connection + .prepareStatement("SELECT COUNT(DISTINCT(local_id)) as total_orgs FROM " + dbTable) + .executeQuery(); + if (resultSet2.next()) { + int total_orgs = resultSet2.getInt("total_orgs"); + assertEquals(6, total_orgs); + } else + fail("No result in the sql DB"); + resultSet2.close(); + + // verify that no DiffRel is in the DB + List diffRels = jsc + .textFile(DedupUtility.createEntityPath(testGraphBasePath, "relation")) + .map(s -> OBJECT_MAPPER.readValue(s, Relation.class)) + .filter(r -> r.getRelClass().equals("isDifferentFrom")) + .map(r -> r.getSource() + "@@@" + r.getTarget()) + .collect(); + + List dbRels = new ArrayList<>(); + ResultSet resultSet3 = connection + .prepareStatement("SELECT local_id, oa_original_id FROM " + dbTable) + .executeQuery(); + while (resultSet3.next()) { + String source = OafMapperUtils.createOpenaireId("organization", resultSet3.getString("local_id"), true); + String target = OafMapperUtils + .createOpenaireId("organization", resultSet3.getString("oa_original_id"), true); + dbRels.add(source + "@@@" + target); + } + resultSet3.close(); + assertTrue(Collections.disjoint(dbRels, diffRels)); + + connection.close(); + } + + @Test + @Order(5) + public void prepareNewOrgsTest() throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/prepareNewOrgs_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath, + "-du", + dbUrl, + "-dusr", + dbUser, + "-t", + dbTable, + "-dpwd", + dbPwd + }); + + new SparkPrepareNewOrgs(parser, spark).run(isLookUpService); + + final Properties connectionProperties = new Properties(); + connectionProperties.put("user", dbUser); + connectionProperties.put("password", dbPwd); + + long orgs_in_diffrel = jsc + .textFile(DedupUtility.createEntityPath(testGraphBasePath, "relation")) + .map(s -> OBJECT_MAPPER.readValue(s, Relation.class)) + .filter(r -> r.getRelClass().equals("isDifferentFrom")) + .map(r -> r.getTarget()) + .distinct() + .count(); + + Connection connection = DriverManager.getConnection(dbUrl, connectionProperties); + + jsc + .textFile(DedupUtility.createEntityPath(testGraphBasePath, "relation")) + .map(s -> OBJECT_MAPPER.readValue(s, Relation.class)) + .filter(r -> r.getRelClass().equals("isDifferentFrom")) + .map(r -> r.getTarget()) + .distinct() + .foreach(s -> System.out.println("difforgs = " + s)); + ResultSet resultSet0 = connection + .prepareStatement("SELECT oa_original_id FROM " + dbTable + " WHERE local_id = ''") + .executeQuery(); + while (resultSet0.next()) + System.out + .println( + "dborgs = " + OafMapperUtils.createOpenaireId(20, resultSet0.getString("oa_original_id"), true)); + resultSet0.close(); + + ResultSet resultSet = connection + .prepareStatement("SELECT COUNT(*) as total_new_orgs FROM " + dbTable + " WHERE local_id = ''") + .executeQuery(); + if (resultSet.next()) { + int total_new_orgs = resultSet.getInt("total_new_orgs"); + assertEquals(orgs_in_diffrel + 1, total_new_orgs); + } else + fail("No result in the sql DB"); + resultSet.close(); + } + + @AfterAll + public static void finalCleanUp() throws IOException { + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/organization/part-00000-5248a339-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/organization/part-00000-5248a339-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..ba58d823c16070b42dad4d612051d2996f74cc27 GIT binary patch literal 4844 zcmb7`_dlC$*v9*4DLRZ2luA*eX0=7AO^jMqYSyk=HA}QcJ+>rjG%dAfY0V<`DjIt$ zD#VH%p;l1Adwbr``v<)F>B@2Cll#2x^E$uB0Y=eY`0sO(ls@IosSz5vb#V7)WA#^A zun&Z9QmIRjyLW&FZ6on@z3_wVyt4BL%6Yh)56m_leo)aruz1pq&L5(jy6|j7`8Drqq&O=1On7s zmos?wWMhM|)U+^cMNPQm#dk@b(E1-TpFU`e2K)+`r>XpnKhW~)gplU*{`Tpn`=NhZ zvegG~z8|Trd606AM*Qq(hhOm^Wf=8xQ*2vlm4)s z;PnXqZEE|4_X3UL#?N6)az^#`_hg-^(M=wZG`P;Tk#tL7-+-$FOVcK(=Qp$2zG(** z@NnN#(F{TQkrS zMJZo->666ei{lVOKNgE{`mzDHo0f{|@47~=zkFDFu!--+GOXlH@cYTls!Kn$T9I!l zTtAIQYW5ep|J!GH=(S9v>tc5-w$$WkO|qP?l}_n^FD?&`UMqAol%_AeHfDma8ngO* z!&0vAnn>!O^gbY2ZCsYHoScJzhhS{tswKUk6ze{4&n?H+lJ*U>sLut1lZ3QDwgt0q6nw{S{Er*wF?0i8n;UO`b z3sY)b1tzmEL4#SxjD208VGE;uD{DnJywisc(h%M!_0Z7og!*W!K8mYh`F_FCLEObp6mS2=n z+>>y;Sm^qfMJiQq-RlR`^2V^g9A`h6oPKgjP8B}(U3=0#R$?uuRJq7LGv#XCOo__m z8Smxb1oMp}ynB+n_-+XNH5c@-6IBz9$!V=Sl2|-I`DW8u@Q*uV>(jppvI*-j{`BX_ zB~>_OnbQej=|?YAW!Sr?vE!Q_6u*(=t>JndV2F64BQ|*bQ?%Tug%R&12OV6qB{x`z zsgByOgqcU?@o^rGUJjzGj%9T zpO;};DJDa$S!Wonq|Y0iBTc?T4)$|sA>)7eHo!-{)_zd#`7us5c&;&Bxby@rR|IA)unq#VjRQtDNKv>&ddFZ=BN@=dQ?pE_N^hjZUk{o{y>bu_gy-TEDQ~ljf+c>TFDME)xzlGj}PszH^)CuC)vA~ zK@PNF_v*@O_r$nLZ-=J7Gv@|#xx2So`qidHqwOR1V^N4dcW)Rog;B?8cZ*#)J6)Qv zRygAtJsx+$iR*3!Se6yNBgRgzGJaHCqcqQN6W4++FkmzIZ3W?i>GX z`7y8=;)drk1!4CX+|hv(ee}M{q0LaVVa5Lu~2V6 zeZl2ks}83C$29_s^0X}e`je`+>W<$e;MiWK{b(*j3tG5t1nGEbSh;KkG$q+I@GkAD zJkt;nt9FPvzQiZeiL2v(*{OCQ^O?6^C8sxsV|%k31@3IMB~ry&PQwhT!naEJ=NVQC zqTC@L%*^2QBJkJKOBHY_S~1NRk2RU=x83>llNAwc0#`2=$#wNxbp?rxWzT z;UCn_=OV}k`MSD|vlX-Q8J8MCTWp?i3n%(Ezh+fS@FhngkKo;EVlShQA}P9Pi6;k= zU=pr>w?L=F^cY+!U$mN753h$Eg2tMjm7b8?6n<7&*Y6eHoVj(?vTD|?hNLh&pz^@o zv0Y<~)0Wa}T{vVEw^A^0OD0hNG;efW<30Ew_3l?ykfRoR1$#y9)x3CYJeE2%LSz6q zr7CEpmj#fa!8xYYrx!0rN^r{EQ|M#$t9gW*Gzc|Za?X0H_Dq=fn+=9+Y-sz+TtxF- zrEqiYF;>Q#ZsmorjJmI84}?>bw|Kk<##&W~j#e*A^AZ>0l`N`f!q`#oUHj0@DbF=) zHeuzdZoC?w^I|7Vi&UipQZI!Ls@dK@YR#*O@BVm04Xi)NTaX67G8Q`hhd{I7{fx5wpoxLC<;X~}3OVbO zBVBmt(Nb4oopfFDM~7le`L=|Vl?9&q6E8yDo8yLqh}YmHlaM-daYY2hfm8gUtwoH4$`T(No zwu_p(%rZG&Bl|bfkS$9i!OD zhCq6=O=V?oj%ImFz;}x#;@`~9zfKP`-vlEGTtuSb1s{bfutGlmM!kVNbM3;g{X$SH z?LY)QBHLkGDU#0|w6qeL2urTZb};bv2Of8m%N&H&h)#srPC5)%ydj*Vo+EsolIk); z`$Ho3k!d+q`~(#T3CZC+P@h>oC@7)w3G^q<{1aHxooOX&qd09QE4IONV|=V~4!Gb7+F5%K4|0OVd4KnAP@ z+9FBQ;qRtqGwxy=k!{5RZh96g9Du@2M4ME5K=OAyRl{50SlOrAJ2CZUnE% zm}XJo4RbRvaMOiRn|Uc%_U((h@e9dDL9x#uU2?MT<|3H?}rPnkl?JboZdfwW# zf~9Tcon_FpZ+8&_Dq{?7_myB$aA*^<^F$J#R`jY21

%+$z#WMe@wIZm@-y!Nfp%7X ztwNi|sDa?y?M$*Ql0k=a>gRf%Dcv?@oI0 zKUaI6>{5qCr9`EOGYkEa`xR*5lf9h1EPWGr$^8JXT)15nf^(ki0+pETN|iIZihWA# znP|ORR>Iu+^C#xUhB5Kf+%00JLQ0FNcyw2b+(9pphBOKwCX<_&x8#c|3Pq%a)o%a# zZ%iwN#5v0u&H7g!>T~DFrcd5)6PK8(_rGtwsZ|nmxgx+;gjuNB*DHn6sQ)jSi}fi7oWI zrQS?D`>U(F)bw9Bc5)xr%T!rZ3x>lJ$92q9WVWZDf?UMuwx#ar`6{6nLau6H1f_Uo zF@leEu8AUf?Nw1ZhqS{vg3w18K_pTv7lbt%4O%OFIYxkHJUZpL?pE9Y`=`y7Ck8}q z&XW3=U6dXp2t=pzZ$NZH{TH1;Fd#bRD4~G+?POr{qPPxb5TM=!7x5IO<7<=}c%A>= zUe>&K#nG+S0hN=OKpc910mknzPG2TtScQHJk3)Y4VO2R|oeW;7`8XMT{Cv}#1=bM> zf^9S9zzjU$P6jNjmLO5&ZcE15;L+^h8G+)_a@uYmoqtnUvtp~UksmassN7HhLfk^C&k<6sH1)p-IcMGZ7Jv}9kc9H( zYU&A$-tbq=MkAh|Da-^Hbm+C`zX~_;`EItgi56`U?kI`7s-Xq3u>Fs2n0JmxY)hl{z~A1P#=%oaccNN=k$<<73b78XLT&XEN6B*;MYWml zZz^s|8=3}~9?Q|S?Mzf|W2hVPX_F;kO&41CKw zPV~THCbo}Ft4Xboy4h&Pa3D@7p8*I3vx)s}=F&p`U&wE6PwlCa}ho2EkSi9tEYn z1j&ThLf=AFIp1@bp7Are|j^!c2vC- zz#hpgc&sfZ-{~-L<=++X%zfsK^SyZc^BlVblin#Xi_oJH!!WeCj6C#+*B)3Xs6dY@ za(er^&xkC|hhp#L4%<4}GA;zl1qLQOnJ3H>P&rq?4B(dG+7NaH1?&7j+kdu?ZbZa& z{xB_zW4Pl*aDAqMb!}%k5J@)G<|StF?>9{iCg>aa51+8b2&l+H(&^PX2u@j2)Q5+c z?{m!)wDqqkG%xPW5l}I};*fTp(2Eok2XEy+i@stnCmSKre1(sp&2wi7ask2svA$z; ze&n3h^Ect~(EWBKdNAp3np|4osoMOa#JzFiVy^eYJ4>o_M$kgF59VXTH?!ZqNIP&R zEwVn4<52d^>q)px<%lrP)g^@%H-(_jyP*ekV{Q@Xh91z3xkaEGdO$aJymwkCQ`79n zSP}O?<>o7p|5p{nEYWPMWFw6aF>wgLOswhI=rM}kIsk_lCg3BU7lSB#^u zL-+77Xc7gWkLS*p0vd?5x4-8OtOHmChI{Tfz><>CiX|O%SBhq{KU}t9KU%o)Z$eZ8 pU_LQ&F@X8lF4``hn@@WeU_R>H>VWxV_H~_sn>bb^Br2&c{12`hrJn!* literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/organization/part-00001-5248a339-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/organization/part-00001-5248a339-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..137790bdec7f86831a611bc4dec1e7cc8e42fa41 GIT binary patch literal 3428 zcmXAqdpuMBAIFVKt_iv461f|5DRWs!B)5q~=CYER>zGT*gvlk0N+!3V<`QY;Hsnq; z*G$PZmyo5ozwm=oX_L&dcI%pr&RJumVX!9@2NeT2`9(Dsn%=jbR)$-4M-e>Z1uwUfN!dSWTntPe|H_2y4tamxBkh)>Bpw_mM$>V|9I0pq*h z`uS&hfyF;H?20PN_WGH8sJB3NSs*y zf)53ORm#U|%P9@b8uOfhFAZVhZ|wCz489D^)8`$YS|Rx^P6 zPl|>`^{Z;(r9Ty88-dA5Ja?O6F{97t?~!P-Qwrp8^kIc2RZqP<$@w9-8D_*l{;$Gx zi__VD%gkaYLa!G`ml#}d=}>#iJ;P;>Hpe9uh^Oz4Z^E^K@7w(7$*Ko+dt2^-he02x z*yB4Z$3<_x676Pd*S49B(8Z-y3R6Q*`}v(+{IVV9NU^IA(<1*uz9+W82*tl5H3S~| z$Y)v%&S|=*NES7ljv&FQjD;@lr_Bz-$|G$KKuNN>*DctKoT=yrbvs72NZ!Z1KWKKi zyS5+5HL6@?{*=Wp--}U%#%A>cBgf11z}dNSnRE5rSomZUk7pc?-K80RI;5ZymOiRa}PR;TRB1_3Jp%>7KcJ$ zNl(t3pgxi7^hWE=&2*DZNo|}GaYBR`>Qu@1*!~4lUqU0?;I-A?r9)kzf`=0NLRpE-=alL-7F%~6RaO+HqTF{lbe_y zi{BbsPQ7hF-lE%ZR+d;oZeJG@o+rA~iTpf^o{ zmjNY~%`0@ek9M_2-jBCA)q*cKoV4qg=l8 zBVl&@0WS{*G^y6T;O!Hiuqs)1J~sQJNy9W!W_|n$1;bOf%aBDFo!rEA5!=b+?0+YDf)MpKzZlEtl@#>#vM)w?Kn zqSdla>FCw&75hkG<+VnkUDvflv0?|hVBLUH8~QT(E?Mp)?H7aau=0Co@~Y7)|FC{3 z?>)RHyj8!ok;&r&+Xr8`Q)}Z9kJiSJ9M);TG|^pfjrVIuZ*F9F5?|&W${*Od*H?S6 zoiNM!H*IZ@*zmgdH@MUKzIE-G z6*sO(wH~OQTZOnhv()Y9eFf}qmkyN<{e6}5I{Wp}c9L``@<)89bm;Ph<7I4RmMuTA z*TXk^n}Vi#*gRZE@tva{d7%0*C$M~~XPl+-s~>&Av=7B#%y$3R=9EyjJN-z@=k)t4 zD5r7N!7mpL!Uv3h8vPtr2EN2r;G+fG38dN(a_5e!&hOD+HAklf=AcJx67!p4;g){X1$eTb+<#Yw* zw2Fqzpe=+?5ZDO5=GWK2YxP3$#|f6QOcVqrifxH)*zT3KB@c@a8H3>LJ2F}JS1B+N zwQAmJB^gpE_e3|ucMeWy1ml6HU%JTFhnu7NcF@N*LuO%T!=*PDGtd_RGskAEYBg9C zSP=S8Sy8O0S`Aeegjl@{hy~GL4bTPXQq?(IIV|Uh(E|3Sek4bYrQ=r|p^G3Hfc=$3 zDy^(F0<8LHHb5jzdA?)KGd<1^var(8ThUjQ6Vm}e$|~MezcGNTO__M@)5ELE90U)r zM?G^8GTAbjgWyqrGzcE`%t7b{^b*}SjWhK{C&fpl9fY8jdt7tAP@CZ2k31yVWvYvX zW+zpwte_deVjGCU$)E@~ZuXCLybIYeh@csi?)c9a!BjCQFBCZ29vvJV%p|VJuE-=F z9ehMQI+#g3gDu0dMX^9aH4$$N?!>+~Lz-$U82^Ze<6Y$B1l(NC=06H<5fboSxgv6~ zi*AI{y^emYNO9GX4-W+hlUON4XA$NRm1ivg3oHw_M1-Kg)X8)aC~&Iy=*(qd4BtZ3 z{?0V{Ybn5~l4?0uKJff9PQ_aE6PnH%7DG#S`E-S^U3&f7@rpXW(Y~K3p5@_$`kX>U z|4`MJq1l3Ji|j__B@eU*T1ogs4qFbBISEWUVxIGlc@C5LH;Hdd<^nZfE2ZZ`69!Hx zSvmvrd>QFtTpbyjzb@QNwP`=;Nk8LRDb{7jozOd2%j3wWqUzOV|IAkwT;tf#no-z! zX>F`FJU?n1*J*74m_6Pe}x^(_jbQdRdwwbIF*tf{Y5SxE^3uoqbC z!R#Dey14mx`Iw!GMn$8T37=#?d6Y2fU&1J6!f|YIN1ZwuzD=Jd^CGd8l}8 zc=E$c?2j1Hl%yL^BbMXrY=S9owCR7+qR4w8=O6LwhT9y&feSggR-en8$*8^KD08Pa zXys?ECH=Ajrl;=~mDY2j(pz3dKap)MIq*!*hQY|fe;WpaWNnxS+opMWK59#kKzw=ipY!Hk9P`Uz6sTE-{#E&Lb-is-v}asii~J>wxd-WL2NS?rC#5@V)8W^ws4|w zCWR08;Eng%^=q$0>bCzR>#agu>6E6X{i&vAIftz=@pF6G^r{1KVA$3xU5`B<1`?4p z3z2tcEna+TJ8b@aM)lo-c!kc*$ox!1as8qe@N-vaf%KefLUHBbvE1$v9&cLIb=@ar z*rAbql7%rfJXf5*PB^>ayP%CHja56eNzq84B0cb!wIt|#jmgs>Yr!JZd-6PJ2g}>* zY=nvk#&Wjg$>#2@-AJi22-S+AGbVs(V|<4OMKjJAUOk&lcMB(P6$+F1jI+k|1*EN{ zt(e7VKh{3Nl#SIt*;p})0cV9@Yw<3UI?2(K(RXe~%#%Mx!~BX(&;WOpph}SL z2Cm~yPxV6-$(FITYr-{+NL<}K11`Wcr!me{cx( zb{sqg6@c0^cef5er$$2+YJar5q4o{dG1<}C!|J=zI%oHT17R0iWY}~t50iiRmRg8D zswcIB6p)gK#b43H^gEI&X}@Jhs%^tLo@Q<72(x2q6YjdNO7t86g}3{2rt&b?OdAIf zu?#!*JG6GIXiyA@*+LNk5qqwVSkO@mu^?s(Q#ewT_KUt<8 literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/organization/part-00002-5248a339-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/organization/part-00002-5248a339-09c4-4aa5-83fe-4cc5405607ad-c000.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..6b090b9f57974446bdef603e7cd1c3b14256b36e GIT binary patch literal 3191 zcmV--42bg|iwFP!000001MS`0Zrerz#_{`o3PM-AK;&|U9CF5YN@KLzwd2B)w`hum zF-Myli8LfRUMJ1F?7QrX?Z{`RcI?oOTnMgoE|`^~lr-bq5s}G4a%*x;IrqJ{gTZ|LgWGpE_0jp_*1|LWY0=TBg2 zJKMZDhu--)Os|i*{H_li_4)Iwj(_gi$*~V-XVutFXL5Yp*(3!i)3nH#|1a~u`S~-^ z^bdXH&%jRP#i;S3@W+ut5@D?>Q>K_KjMJ=O(h@5DNi>t{u&U*#@4vzCox2ZDjnRcz)HRc_ zyzdzMAAkSF?3jPcMpn1Saz2H84SVRv|H${hD0l1DiyJ;&&&2(@`d}y1YWBbXCZ2jTmK{vOZ=QsH7Rr&I7_}d^5B9A;dU`QLr&jsC_GB`;9*O3#x%H*h=R>0!b9Q@Z z&hud;o;4DeNss;1zBqeCT}0Hnsr`Mfnjz_;l1}W@%8B+b)`zmLKFX`#2I?-N?ooAk zA9am?V|07&`akE+@AGfMrpUeY+(^8r#CD$ecv4L*nl7s8bZmXd zYX0{cgQ^RA7j`n6>c(K6E~uz(PU`u$y>s6E;TPxCpUKgEPkr&|%HIE!H}`)2{FhHp z$0U`dM(R?@IVcd98Chzo6ye4QE{MC0NxOS*dPz6=ad$j!>SXWo4Q2ACHFMc)9F1jnIv3t?wW0lMH0nQVX0Kg1V=w%maQXyj+8W@DKj}Jw zGyb}30?zou1_hkoac(4DE^y-Q!V-KFQCq~Du@?nzVz#V+H~rvgrI?dONNs2#ZC1+6 zX--o|tSKMm%}ivJ<}@(g+#l}Xb!Tz9x&OBR&#k>df7nlkTmN{wyS@8UKiTPR_kJ7n z!N@D6&Xd`AV|<3@O9W254}@<9)%0H9MA0SkCgLtfH_;Wi+4|f_+}_oj@i$$^>dpA8 zf;U&dn>)XKab7!eB>kyL2u5gwt z%X3PKz=ZR!`@@~T;l*d^#<|x^kQ&O_rshyiC}#_sLOG$FEs6`}gmT858;PNu;WmYG zLOH`P3gyIvS&ecMBc0CDLONnnt4t=TESP^t&NE+wKc<|b%!)GSVO*yElutf9OB$EF ztHw&IQ@lK_7Uw>0NWJm#6xG3VAqED|;Ueo+B*jJ60J`>d1<-|&8;JpQ;Wh=(0qDXn zdN0tu`}K?Sy2T6ty6wDP{$q7j{_a!Wl@F!baz(G z?eqqC^;zIL#4F*flB@qQkPPHRp33*gyi(?@tFfG786uoB(7Mm0lo2xo|a zA)L6-x(!8fq4j!v6MrQHaJD};5(7BHZ3^H7aE4zL+vQ-ktUxy|{CXF!Jd0a4*W2FdB|E+0)?P9g?!W2p?(OgRAMfj}OM1zh ze)4+0b;(<7%e*Y6d+1_e)nFwZR?-C;80LwUbZsb#m2|jLxOH6to?+xhVl1Q!w<*LE z;u(I?5XDn6W|bgW&WkcPoMl81m-~NBnVGcY-z=W~fS;R5_0bW@)&?--7I}E zfX=;cS+7GePo=N==b%3rZte7w!QM97uF^Q=Xe_%^e@1p(ZD{|Jjrz;8&ZWMGiM;Sy zurvCmt6$(5ebTiq@Ql9hx)yjwA2uxS{C;yIaf`h(`lb(gC*Fj$i8~`N3ho4VuAV!e zyBW+CcYecWFstOwx5Y0!Ja#5|J% z+8ZYF%t7uA6M1uhC&06XO;>(Jd-O%uc}08lak0b`-WhdnB!+i}+Z5gj?+m{v)_4Lt zSC5^~-74k=JD*`4MOln%u_AxQfrI5Fe ziM)HD+s8y+9gFQ?pe<~Q#dcV1*P^(%wio~!b8aLCfQH)?015yNzi1c$Y9{lKO}pA2 zSy{_6DKla;)k?99Ym#cJ3g#r^G`|I)Z+3fs@Af9M%Kv?5x-!@lG4Cn#ZLnz?`EPmI ze{+-YlnY7=UAi>Qb4#5mtl*A2RY+N8e}c`s4MSffpYOzT{X(ZUi4m++Z%jwUIRN(OZ~V%s9ZR+;w37_bm3&F4N)qUDbEX< z5#>ixa9U>E@JCecW>1TQtIwD`_(dMc`YyKnQ2%M-DrML zNC%{AVOStt7`c%cNEdEXARUk{{G!3^$^oVeXI{L7=_oTq5Js&fT$)T$R|+RY&ZY8S zn?HX5-Q!Gmwe@WHPPg1ma-*Je;p{~mduB*oC?f%>gVePnDWnck*TS%nx-fDhF{Ccs zrjR;FUHC;Ib(j|ml{&(h5P~?X^PFm9bna+oOPg{-(n3FWnd>d78`}4EGinZR%r0E3 zyFS!c<|S*ji9f-^+{r;EnetQ z7&I5pNPy-*bL~h9ngh+XFf3>;jNC{JnhUonXbvVNTJaU}esX5>7ai z%0jEOu*#}}*g_Ofhq=bJ0OlG8Hd-#sp)g=BmXQF=0p{9~6fg&vYhhTxTo}2L7%&%Z zQ@|WxF8rc^In0U$!kkrvE1B`!_^~RSB$m2N<&>GykD?OKg1J`*cW-~&e5eiv(zF8X zUL|*LpPzg(KY7f|B|{#{1NfpG2>>5}uN_GNd;q={h6V72ksFBteBm|)@B#S3FACtp ztXLrMQI}H9iLfM7RFX_ESrDg5ZcAQh$G#2t=ATwp2m8@=7n*c=j+kf5;yMI|%0(~| zP&uevJCZ`>pmHq?3zZ8aHxfhT!fgtbgUW?p^j?+g55G9C{!EVUdupIue3RlOD3|6$ zD#Nu9snaa;UkGJrQRXtWw4~pLa;;chhqYYGW;qmw%SAB~a5=bKJCefX;BqYt3zrKc dHxk3;!fgtdgUf|qbakuc{tx*sV!*r10RVe}T$%s? literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/relation/part-00000-94553c9f-4ae6-4db9-919d-85ddc0a60f92-c000.txt.gz b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/openorgs_dedup/relation/part-00000-94553c9f-4ae6-4db9-919d-85ddc0a60f92-c000.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..080665d22d16b6550043d98dfab96209683890c0 GIT binary patch literal 683 zcmV;c0#yAUiwFP!000001MQYei`y^|$M5|V#ph7;e)v|}OHT{!-j-z4%to9cR8%FFQWqhb# z+Y+u=^uckWl~yuXPl%upIHyu_G|j4cf^LgddsBrsd0k$=hY1`Mtb3s@KlE{E+J4)0 zhx?x``zG~IsU+7`b%d_qs#+rEjNxJk*kLWx?T$Ox`)A)NZ_8Bkk~h|p=B%AP#ymXL z9*(|E-FW^7Kc%6cQ~M4|Ni|PXb|r1bAy2K(Ln&XNdu*|v(zMICPG!ow8sg~Q!v_CU zQ2b}y;4wkyacZAyczdj&Tzt+^f8ljFteTp8+K%<$a7r626Kuw&rHX1o6qlMBxoR?Y zx4Ur;G~_k(=_}0oo1fNm!mayw@e3HXV{=r#o4c!~R6eCm>LA|^v-hyg$0KAXC~Dwx zf`UqF83rww(G>^^8Vr|FMTh~dlvO-IUgLBU`ymH-IDJ(dAbL&|L>dUXz=461QWIf# zy`eKvX>Ru!gR`Z!QQ^W9O~;EJF;59=@pt!!SNnzQh3kLE^(+npR zV1wX-AX7pL9}h3jkN8Ux`VjEUYz=e)<*g zpKtxge63_f3I$NgoY5j^ufTXMxpW$QT)1Aiz7DQm&eu#S3WOuCb2U{wY9>$Jr4Wh@gKZ>x7OWhb9y-Fo6Yfbc(7GmitqSV|J#7$c&oR1*?{cyw0_!$KVJ6X z__@rdwf=`vA3wfY*|tyjclO2UX>+T`6W`vg){voukRuhS^lEj`?e`CN=N0V_Z+N%) zC!gbAfARXf!TbK#&*enr{%&=N{L}g9Y|-ZUx6SQl%ZKmx=hFN8!{sZ)M-;Z=<0Fbr zgPWw7f?`gTH0Gk%Le&{-!D+!KKD^P>WANA?gl}%%V!*IoW6r=KYQ&0=<1Co81WJA4 zd9Cs&YI8o^CkCIE*kKqhebjV0v3GhnZPa%DhaW%w?t{B_*UxdCV=x#(P1RiVPF;{? z8rLo(l%KhlJ^!7J-`MJLy{3^Tmm$!5;80V>2%R~$iX~R5?%G{1!}U4XY#$zj6IqSj z2_nr|LaeA$3s5U8ok6?1cGoZDI%LjVH7cg6eWcv0j+92K8H71~)pboZlASS2Kt`e3 zga}d<>ZHx0!CkxS7jg|c8m10~TJMm>@(G+MEH&5G5x$y-@BrzGW7gV`dQZg7D)0;# z6auNccGpXE-7O8Fl1O@+ZCY#|J!I`_v^BTvuHE%AT+Ad)Vn9RO za~fjpPiuoF0Yn9BLgHxNAL6cG%5@@`CFe0`#1RUWI%bqMi?szcQ2c8DH3K(JaY`J; z+Kq;ZB_YzNGKMyG*Y0{zu0Q1=?%G}dMqU5Pe!gqhvlWg6L3=RDWRW#$N79C63~|@) z`o;2Vf`%$3VJ!v%xNwa$^W-Voh<+izJ_p;rzg@3$ijX@9P|`edAV>(Kw*slQnS#4^ z*UPA{uU*?&D2Y=*ZYB3JN`RC)XHwvp1G;N>y+qfg6~i+%Xr)Lx0ddBTDP}>ol-pgq z>t(bXuU*FxWm?u^fKVFL+9ifqO0s&JV({yY?s`G@J$;d1N2)c(W{Qi%ZmEY*?_HtT zkE8{1*Y0|Wt_52wsig`%*RH)~hTNJpLkHE-n!9$_%emh8(Q^cqww0bwZ}$V@>d z_Wo;k?XKOmyMD28yVtH!W^FLb(40zyM4`o?17j&zrzUspu9xUqGlR_B#p>t8kkh1+ zhqh|U6=QVQ?s|!?C0L<8jB>U%Bj(s~Rvi>*NFnfXyY70Cu492QwPp>SvWN}{l}DY@ zBqrnTuHE$#U0bc(!ugua92#cr7GsXc)&vQX`hBYIdXcUx#au&~4F^NYnrE={*F%v4 zn1YXQbk~b?jiFd;v9{D;Hqa1kMSWNmP7@&dbs2a4e{lWXW?sI;)1S{S{>Nhrp2qH$ zFsQbea84QuLu1p~3t8=*Ib$YFg_Os@n9I3W|vGzH$9RMR%6z-)YZ8EdZ`{cG? zwBCNs963-!ZHRf4X`GVcl&r1JI(ud=-k0sRU$ox-b@#1akK?VDh9vtj6t!$5O@w|K zq(CBQzQOzb-1dTOU+3G*vkj;t6}E((isdtfQl~hi#TmVi#BDFg_I1AP^|ssoc5Gkg z+eiWp2vwn)CB|9WY%Nvp_TJ|faobC>egE#Tx%r%LXT>yPo)Ko^)FV`^krKyDWc9X> zr*YdaY5Q4oLUqfv?bk3_8V|wD8VkocDh87+e(lC>FUa=K`T&VqYcS6dI7Ei!+);~$ zDiHGUK0vp