From 616622d2bb90f4a5e43a9da2dc864ef03dab1f00 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 7 Dec 2023 09:59:52 +0100 Subject: [PATCH 01/57] first version of the workflow single step --- .../main/java/eu/dnetlib/dhp/api/Utils.java | 5 + .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 8 +- .../PrepareDatasourceCountryAssociation.java | 2 +- .../SparkCountryPropagationJob.java | 6 + ...kResultToCommunityFromOrganizationJob.java | 6 + .../PrepareResultCommunitySet.java | 2 +- .../SparkResultToCommunityFromProject.java | 6 + .../PrepareResultCommunitySetStep1.java | 18 +- ...parkResultToCommunityThroughSemRelJob.java | 6 + ...t_preparecommunitytoresult_parameters.json | 4 +- .../oozie_app/workflow.xml | 6 +- .../dhp/wf/main/oozie_app/config-default.xml | 30 ++ .../dnetlib/dhp/wf/main/oozie_app/import.txt | 10 + .../dhp/wf/main/oozie_app/workflow.xml | 324 +++++++++++++++ .../bulktag/oozie_app/config-default.xml | 54 +++ .../bulktag/oozie_app/workflow.xml | 66 ++++ .../oozie_app/config-default.xml | 58 +++ .../countrypropagation/oozie_app/workflow.xml | 316 +++++++++++++++ .../input_preparation_parameter.json | 50 +++ .../input_propagation_parameter.json | 62 +++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 93 +++++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 369 ++++++++++++++++++ .../oozie_app/config-default.xml | 63 +++ .../projecttoresult/oozie_app/workflow.xml | 94 +++++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 88 +++++ .../input_communitytoresult_parameters.json | 28 ++ ...t_preparecommunitytoresult_parameters.json | 28 ++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 90 +++++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 305 +++++++++++++++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 182 +++++++++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 97 +++++ 38 files changed, 2863 insertions(+), 19 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index d121b8b7e..bb30f55d6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -167,4 +167,9 @@ public class Utils implements Serializable { }); return projectMap; } + + public static List getCommunityIdList(String baseURL) throws IOException { + return getValidCommunities(baseURL).stream() + .map(community -> community.getId()).collect(Collectors.toList()); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 5d1b2b38d..5745515ba 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -151,7 +151,13 @@ public class SparkBulkTagJob { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + e.name()); + .json(outputPath + e.name());//writing the tagging in the working dir for entity + + readPath(spark, outputPath + e.name(), resultClazz) //copy the tagging in the actual result output path + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(inputPath + e.name()); }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index b9f3bff52..b1720d19d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -66,7 +66,7 @@ public class PrepareDatasourceCountryAssociation { conf, isSparkSessionManaged, spark -> { - removeOutputDir(spark, outputPath); + //removeOutputDir(spark, outputPath); prepareDatasourceCountryAssociation( spark, Arrays.asList(parser.get("whitelist").split(";")), diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index d9f6433a0..2b0dd7628 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -97,6 +97,12 @@ public class SparkCountryPropagationJob { .mode(SaveMode.Overwrite) .json(outputPath); + readPath(spark, outputPath, resultClazz) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(sourcePath); + } private static MapFunction, R> getCountryMergeFn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index df8ca3805..9152b1f5a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -92,6 +92,12 @@ public class SparkResultToCommunityFromOrganizationJob { .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath + e.name()); + + readPath(spark, outputPath + e.name(), resultClazz) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(inputPath + e.name()); } }); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java index 7fed2606b..467e11a96 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java @@ -53,7 +53,7 @@ public class PrepareResultCommunitySet { log.info("outputPath: {}", outputPath); final String baseURL = parser.get("baseURL"); - log.info("baseUEL: {}", baseURL); + log.info("baseURL: {}", baseURL); final CommunityEntityMap projectsMap = Utils.getCommunityProjects(baseURL); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index 6e298cf94..547891584 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -102,6 +102,12 @@ public class SparkResultToCommunityFromProject implements Serializable { .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath + e.name()); + + readPath(spark, outputPath + e.name(), resultClazz) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(inputPath + e.name()); } }); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index 0c836a3ba..73c4e2d7c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -4,9 +4,11 @@ package eu.dnetlib.dhp.resulttocommunityfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import java.io.IOException; import java.util.Arrays; import java.util.List; +import eu.dnetlib.dhp.api.Utils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.sql.*; @@ -26,11 +28,6 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class PrepareResultCommunitySetStep1 { private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySetStep1.class); - private static final String COMMUNITY_LIST_XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')" - + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']" - + " and $x//CONFIGURATION/context/param[./@name='status']/text() != 'hidden'" - + " return $x//CONFIGURATION/context/@id/string()"; - /** * associates to each result the set of community contexts they are associated to; associates to each target of a * relation with allowed semantics the set of community context it could possibly inherit from the source of the @@ -88,10 +85,10 @@ public class PrepareResultCommunitySetStep1 { final List allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";")); log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel)); - final String isLookupUrl = parser.get("isLookUpUrl"); - log.info("isLookupUrl: {}", isLookupUrl); + final String baseURL = parser.get("baseURL"); + log.info("baseURL: {}", baseURL); - final List communityIdList = getCommunityList(isLookupUrl); + final List communityIdList = getCommunityList(baseURL); log.info("communityIdList: {}", new Gson().toJson(communityIdList)); final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase(); @@ -159,9 +156,8 @@ public class PrepareResultCommunitySetStep1 { .json(outputResultPath); } - public static List getCommunityList(final String isLookupUrl) throws ISLookUpException { - ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); - return isLookUp.quickSearchProfile(COMMUNITY_LIST_XQUERY); + public static List getCommunityList(final String baseURL) throws IOException { + return Utils.getCommunityIdList(baseURL); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index f31a26230..bb7ff1fb7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -100,6 +100,12 @@ public class SparkResultToCommunityThroughSemRelJob { .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); + + readPath(spark, outputPath, resultClazz) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(inputPath); } private static MapFunction, R> contextUpdaterFn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json index 8c99da673..271db10bb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json @@ -1,7 +1,7 @@ [ { - "paramName":"is", - "paramLongName":"isLookUpUrl", + "paramName":"bu", + "paramLongName":"baseURL", "paramDescription": "URL of the isLookUp Service", "paramRequired": true }, diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml index 81b51443c..916eb8b7c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -9,8 +9,8 @@ the semantic relationships allowed for propagation - isLookUpUrl - the isLookup service endpoint + baseURL + the baseurl for the comminity APIs outputPath @@ -116,7 +116,7 @@ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${workingDir}/preparedInfo/targetCommunityAssoc --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} + --baseURL${baseURL} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml new file mode 100644 index 000000000..d262cb6e0 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt new file mode 100644 index 000000000..b20259414 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt @@ -0,0 +1,10 @@ +## This is a classpath-based import file (this header is required) +orcid_propagation classpath eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app +bulk_tagging classpath eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app +affiliation_inst_repo classpath eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app +entity_semantic_relation classpath eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app +community_organization classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app +result_project classpath eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app +community_project classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app +community_sem_rel classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app +country_propagation classpath eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml new file mode 100644 index 000000000..1e6736bf4 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -0,0 +1,324 @@ + + + + + sourcePath + the source path + + + allowedsemrelsorcidprop + the semantic relationships allowed for propagation + + + allowedsemrelsresultproject + the allowed semantics + + + allowedsemrelscommunitysemrel + the semantic relationships allowed for propagation + + + datasourceWhitelistForCountryPropagation + the white list + + + allowedtypes + the allowed types + + + outputPath + the output path + + + organizationtoresultcommunitymap + organization community map + + + pathMap + the json path associated to each selection field + + + blacklist + list of datasources in blacklist for the affiliation from instrepo propagation + + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${wf:conf('resumeFrom') eq 'BulkTagging'} + ${wf:conf('resumeFrom') eq 'AffiliationInstitutionalRepository'} + ${wf:conf('resumeFrom') eq 'AffiliationSemanticRelation'} + ${wf:conf('resumeFrom') eq 'CommunityOrganization'} + ${wf:conf('resumeFrom') eq 'ResultProject'} + ${wf:conf('resumeFrom') eq 'CommunityProject'} + ${wf:conf('resumeFrom') eq 'CommunitySemanticRelation'} + ${wf:conf('resumeFrom') eq 'CountryPropagation'} + + + + + + + + ${wf:appPath()}/orcid_propagation + + + + + sourcePath + ${sourcePath} + + + allowedsemrels + ${allowedsemrelsorcidprop} + + + outputPath + ${outputPath} + + + + + + + + + + ${wf:appPath()}/bulk_tagging + + + + + sourcePath + ${outputPath} + + + baseURL + ${baseURL} + + + pathMap + ${pathMap} + + + + + + + + + + ${wf:appPath()}/affiliation_inst_repo + + + + + sourcePath + ${outputPath} + + + blacklist + ${blacklist} + + + + + + + + + + ${wf:appPath()}/affiliation_semantic_relation + + + + + sourcePath + ${outputPath} + + + + + + + + + + ${wf:appPath()}/community_organization + + + + + sourcePath + ${outputPath} + + + baseURL + ${baseURL} + + + + + + + + + + ${wf:appPath()}/result_project + + + + + sourcePath + ${outputPath} + + + allowedsemrels + ${allowedsemrelsresultproject} + + + + + + + + + + ${wf:appPath()}/community_project + + + + + sourcePath + ${outputPath} + + + + + + + + + + ${wf:appPath()}/community_sem_rel + + + + + sourcePath + ${outputPath} + + + allowedsemrels + ${allowedsemrelscommunitysemrel} + + + baseURL + ${baseURL} + + + + + + + + + + ${wf:appPath()}/country_propagation + + + + + sourcePath + ${outputPath} + + + whitelist + ${datasourceWhitelistForCountryPropagation} + + + allowedtypes + ${allowedtupes} + + + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml new file mode 100644 index 000000000..fe82ae194 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml @@ -0,0 +1,54 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml new file mode 100644 index 000000000..a735e2b0e --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml @@ -0,0 +1,66 @@ + + + + sourcePath + the source path + + + pathMap + the json path associated to each selection field + + + baseURL + The URL to access the community APIs + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn-cluster + cluster + bulkTagging-publication + eu.dnetlib.dhp.bulktag.SparkBulkTagJob + dhp-enrichment-${projectVersion}.jar + + --num-executors=${sparkExecutorNumber} + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath}/ + --workingPath${workingDir}/bulktag/ + --pathMap${pathMap} + --baseURL${baseURL} + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml new file mode 100644 index 000000000..2744ea92b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml new file mode 100644 index 000000000..1fbaeb5d5 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -0,0 +1,316 @@ + + + + sourcePath + the source path + + + whitelist + the white list + + + allowedtypes + the allowed types + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + yarn + cluster + PrepareDatasourceCountryAssociation + eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath} + --whitelist${whitelist} + --allowedtypes${allowedtypes} + --workingPath${workingDir}/country + + + + + + + + + + + + + + + yarn + cluster + prepareResultCountry-Publication + eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/publication + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + + + + + + + + yarn + cluster + prepareResultCountry-Dataset + eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/dataset + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + + + + + + + + yarn + cluster + prepareResultCountry-ORP + eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/otherresearchproduct + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + + + + + + + + yarn + cluster + prepareResultCountry-Software + eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/software + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + + + + + + + + + + + + + + + + + yarn + cluster + countryPropagationForPublications + eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/publication + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + + + + + + + + + yarn + cluster + countryPropagationForDataset + eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/dataset + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + + + + + + + + + yarn + cluster + countryPropagationForORP + eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/otherresearchproduct + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + + + + + + + + + yarn + cluster + countryPropagationForSoftware + eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/software + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json new file mode 100644 index 000000000..b59937331 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json @@ -0,0 +1,50 @@ +[ + { + "paramName":"gp", + "paramLongName":"graphPath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"lp", + "paramLongName":"leavesPath", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"cp", + "paramLongName":"childParentPath", + "paramDescription": "path where to store/find association from datasource and organization", + "paramRequired": true + }, + { + "paramName":"rp", + "paramLongName":"resultOrgPath", + "paramDescription": "path where to store/find already linked results and organizations", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "rep", + "paramLongName": "relationPath", + "paramDescription": "the path where to store the selected subset of relations", + "paramRequired": false + }, + { + "paramName": "pop", + "paramLongName": "projectOrganizationPath", + "paramDescription": "the number of iterations to be computed", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json new file mode 100644 index 000000000..66a7f5b2f --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json @@ -0,0 +1,62 @@ +[ + { + "paramName":"rep", + "paramLongName":"relationPath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"lp", + "paramLongName":"leavesPath", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"cp", + "paramLongName":"childParentPath", + "paramDescription": "path where to store/find association from datasource and organization", + "paramRequired": true + }, + { + "paramName":"rp", + "paramLongName":"resultOrgPath", + "paramDescription": "path where to store/find already linked results and organizations", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "wd", + "paramLongName": "workingDir", + "paramDescription": "true if it is a test running", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "it", + "paramLongName": "iterations", + "paramDescription": "the number of iterations to be computed", + "paramRequired": false + }, + { + "paramName": "pop", + "paramLongName": "projectOrganizationPath", + "paramDescription": "the number of iterations to be computed", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml new file mode 100644 index 000000000..2744ea92b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml new file mode 100644 index 000000000..e3f3c1758 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -0,0 +1,93 @@ + + + + sourcePath + the source path + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn + cluster + PrepareResultOrganizationAssociation + eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --graphPath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --leavesPath${workingDir}/entitiesSemanticRelation/preparedInfo/leavesPath + --childParentPath${workingDir}/entitiesSemanticRelation/preparedInfo/childParentPath + --resultOrgPath${workingDir}/entitiesSemanticRelation/preparedInfo/resultOrgPath + --projectOrganizationPath${workingDir}/entitiesSemanticRelation/preparedInfo/projectOrganizationPath + --relationPath${workingDir}/entitiesSemanticRelation/preparedInfo/relation + + + + + + + + yarn + cluster + resultToOrganizationFromSemRel + eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=3840 + + --relationPath${workingDir}/entitiesSemanticRelation/preparedInfo/relation + --outputPath${sourcePath}/relation + --leavesPath${workingDir}/entitiesSemanticRelation/preparedInfo/leavesPath + --childParentPath${workingDir}/entitiesSemanticRelation/preparedInfo/childParentPath + --resultOrgPath${workingDir}/entitiesSemanticRelation/preparedInfo/resultOrgPath + --projectOrganizationPath${workingDir}/entitiesSemanticRelation/preparedInfo/projectOrganizationPath + --hive_metastore_uris${hive_metastore_uris} + --workingDir${workingDir}/entitiesSemanticRelation/working + --iterations${iterations} + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml new file mode 100644 index 000000000..8d2c34105 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml new file mode 100644 index 000000000..6d800d6e2 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -0,0 +1,369 @@ + + + + sourcePath + the source path + + + allowedsemrels + the semantic relationships allowed for propagation + + + outputPath + the output path + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/relation + ${nameNode}/${outputPath}/relation + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/organization + ${nameNode}/${outputPath}/organization + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/project + ${nameNode}/${outputPath}/project + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/datasource + ${nameNode}/${outputPath}/datasource + + + + + + + + + + yarn + cluster + ORCIDPropagation-PreparePhase1-Publications + eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} + + + + + + + + yarn + cluster + ORCIDPropagation-PreparePhase1-Dataset + eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} + + + + + + + + yarn + cluster + ORCIDPropagation-PreparePhase1-ORP + eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} + + + + + + + + yarn + cluster + ORCIDPropagation-PreparePhase1-Software + eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} + + + + + + + + + + yarn + cluster + ORCIDPropagation-PreparePhase2 + eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/orcidprop + --outputPath${workingDir}/orcidprop/mergedOrcidAssoc + + + + + + + + + + + + + + + yarn + cluster + ORCIDPropagation-Publication + eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --sourcePath${sourcePath}/publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${outputPath}/publication + + + + + + + + yarn + cluster + ORCIDPropagation-Dataset + eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + + --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --sourcePath${sourcePath}/dataset + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${outputPath}/dataset + + + + + + + + yarn + cluster + ORCIDPropagation-ORP + eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + + --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --sourcePath${sourcePath}/otherresearchproduct + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${outputPath}/otherresearchproduct + + + + + + + + yarn + cluster + ORCIDPropagation-Software + eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + + --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --sourcePath${sourcePath}/software + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${outputPath}/software + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml new file mode 100644 index 000000000..caf3c6050 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml @@ -0,0 +1,63 @@ + + + jobTracker + yarnRM + + + + nameNode + + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml new file mode 100644 index 000000000..93a2f98be --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -0,0 +1,94 @@ + + + + sourcePath + the source path + + + allowedsemrels + the allowed semantics + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn + cluster + PrepareProjectResultsAssociation + eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath}/relation + --allowedsemrels${allowedsemrels} + --hive_metastore_uris${hive_metastore_uris} + --potentialUpdatePath${workingDir}/resultproject/preparedInfo/potentialUpdates + --alreadyLinkedPath${workingDir}/resultproject/preparedInfo/alreadyLinked + + + + + + + + yarn + cluster + ProjectToResultPropagation + eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --hive_metastore_uris${hive_metastore_uris} + --outputPath${sourcePath}/relation + --potentialUpdatePath${workingDir}/resultproject/preparedInfo/potentialUpdates + --alreadyLinkedPath${workingDir}/resultproject/preparedInfo/alreadyLinked + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml new file mode 100644 index 000000000..2744ea92b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml new file mode 100644 index 000000000..8aec530cc --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -0,0 +1,88 @@ + + + + sourcePath + the source path + + + baseURL + the baseURL from where to reach the community APIs + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + Prepare-Community-Result-Organization + eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/relation + --outputPath${workingDir}/communityorganization/preparedInfo/resultCommunityList + --hive_metastore_uris${hive_metastore_uris} + --baseURL${baseURL} + + + + + + + + yarn + cluster + community2resultfromorganization-Publication + eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList + --sourcePath${sourcePath}/ + --outputPath${workingDir}/resulttocommunityfromorganization/ + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json new file mode 100644 index 000000000..0db8085d1 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json @@ -0,0 +1,28 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "p", + "paramLongName": "preparedInfoPath", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": true + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json new file mode 100644 index 000000000..cbc01c2d5 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json @@ -0,0 +1,28 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "bu", + "paramLongName": "baseURL", + "paramDescription": "the path used to store temporary output files", + "paramRequired": false + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml new file mode 100644 index 000000000..2744ea92b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml new file mode 100644 index 000000000..90ed2e0b6 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml @@ -0,0 +1,90 @@ + + + + sourcePath + the source path + + + baseURL + the base URL to use to select the right community APIs + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn + cluster + Prepare-Community-Result-Organization + eu.dnetlib.dhp.resulttocommunityfromproject.PrepareResultCommunitySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/relation + --outputPath${workingDir}/communitythroughproject/preparedInfo/resultCommunityList + --baseURL${baseURL} + + + + + + + + yarn + cluster + community2resultfromproject + eu.dnetlib.dhp.resulttocommunityfromproject.SparkResultToCommunityFromProject + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/communitythroughproject/preparedInfo/resultCommunityList + --sourcePath${sourcePath}/ + --outputPath${workingDir}/communitythroughproject/ + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml new file mode 100644 index 000000000..2744ea92b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml new file mode 100644 index 000000000..be88c45bd --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -0,0 +1,305 @@ + + + + sourcePath + the source path + + + allowedsemrels + the semantic relationships allowed for propagation + + + baseURL + the isLookup service endpoint + + + outputPath + the output path + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + yarn + cluster + ResultToCommunitySemRel-PreparePhase1-Publications + eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc + --allowedsemrels${allowedsemrels} + --baseURL${baseURL} + + + + + + + + yarn + cluster + ResultToCommunitySemRel-PreparePhase1-Dataset + eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc + --allowedsemrels${allowedsemrels} + --baseURL${baseURL} + + + + + + + + yarn + cluster + ResultToCommunitySemRel-PreparePhase1-ORP + eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc + --allowedsemrels${allowedsemrels} + --baseURL${baseURL} + + + + + + + + yarn + cluster + ResultToCommunitySemRel-PreparePhase1-Software + eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc + --allowedsemrels${allowedsemrels} + --baseURL${baseURL} + + + + + + + + + + yarn + cluster + ResultToCommunityEmRelPropagation-PreparePhase2 + eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc + --outputPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc + + + + + + + + + + + + + + + yarn + cluster + Result2CommunitySemRelPropagation-Publication + eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc + --sourcePath${sourcePath}/publication + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/communitysemrel/publication + + + + + + + + + yarn + cluster + Result2CommunitySemRelPropagation-Dataset + eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc + --sourcePath${sourcePath}/dataset + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/communitysemrel/dataset + + + + + + + + + yarn + cluster + Result2CommunitySemRelPropagation-ORP + eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc + --sourcePath${sourcePath}/otherresearchproduct + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/communitysemrel/otherresearchproduct + + + + + + + + + yarn + cluster + Result2CommunitySemRelPropagation-Software + eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc + --sourcePath${sourcePath}/software + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/communitysemrel/software + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml new file mode 100644 index 000000000..2744ea92b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml new file mode 100644 index 000000000..8281130f3 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -0,0 +1,182 @@ + + + + sourcePath + the source path + + + blacklist + The list of institutional repositories that should not be used for the propagation + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn + cluster + PrepareResultOrganizationAssociation + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.PrepareResultInstRepoAssociation + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --workingPath${workingDir}/affiliationInstRepo + --blacklist${blacklist} + + + + + + + + + + + + + + + yarn + cluster + resultToOrganizationFromInstRepoPropagationForPublications + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/publication + --outputPath${sourcePath}/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + + + + + + + + yarn + cluster + resultToOrganizationFromInstRepoPropagationForDataset + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/dataset + --outputPath${sourcePath}/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + + + + + + + + yarn + cluster + resultToOrganizationFromInstRepoPropagationForORP + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/otherresearchproduct + --outputPath${sourcePath}/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + + + + + + + + yarn + cluster + resultToOrganizationFromInstRepoPropagationForSoftware + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/software + --outputPath${sourcePath}/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml new file mode 100644 index 000000000..2744ea92b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml new file mode 100644 index 000000000..7918df120 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml @@ -0,0 +1,97 @@ + + + + sourcePath + the source path + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + PrepareResultOrganizationAssociation + eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --graphPath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --leavesPath${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath + --childParentPath${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath + --resultOrgPath${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath + --relationPath${workingDir}/affiliationSemanticRelation/preparedInfo/relation + + + + + + + + yarn + cluster + resultToOrganizationFromSemRel + eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=3840 + + --relationPath${workingDir}/affiliationSemanticRelation/preparedInfo/relation + --outputPath${sourcePath} + --leavesPath${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath + --childParentPath${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath + --resultOrgPath${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath + --hive_metastore_uris${hive_metastore_uris} + --workingDir${workingDir}/affiliationSemanticRelation/working + --iterations${iterations} + + + + + + + + + + + + + + + + + \ No newline at end of file From d4eedada71436a7cae1a5ab154598503b8f36e91 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sat, 9 Dec 2023 15:20:11 +0100 Subject: [PATCH 02/57] adjusting workflow definition --- .../main/java/eu/dnetlib/dhp/api/Utils.java | 6 ++-- .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 17 +++++----- .../PrepareDatasourceCountryAssociation.java | 2 +- .../SparkCountryPropagationJob.java | 8 ++--- ...kResultToCommunityFromOrganizationJob.java | 8 ++--- .../SparkResultToCommunityFromProject.java | 8 ++--- .../PrepareResultCommunitySetStep1.java | 2 +- ...parkResultToCommunityThroughSemRelJob.java | 8 ++--- .../eu/dnetlib/dhp/wf/main/job.properties | 15 +++++++++ .../bulktag/oozie_app/workflow.xml | 2 +- .../oozie_app/workflow.xml | 31 +++++++++++++------ 11 files changed, 66 insertions(+), 41 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index bb30f55d6..06d0f95c2 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -169,7 +169,9 @@ public class Utils implements Serializable { } public static List getCommunityIdList(String baseURL) throws IOException { - return getValidCommunities(baseURL).stream() - .map(community -> community.getId()).collect(Collectors.toList()); + return getValidCommunities(baseURL) + .stream() + .map(community -> community.getId()) + .collect(Collectors.toList()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 5745515ba..51307ccd1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -105,7 +105,6 @@ public class SparkBulkTagJob { Map>> dsm = cc.getEoscDatasourceMap(); for (String ds : datasources.collectAsList()) { - // final String dsId = ds.substring(3); if (!dsm.containsKey(ds)) { ArrayList> eoscList = new ArrayList<>(); dsm.put(ds, eoscList); @@ -116,13 +115,11 @@ public class SparkBulkTagJob { private static boolean isOKDatasource(Datasource ds) { final String compatibility = ds.getOpenairecompatibility().getClassid(); - boolean isOk = (compatibility.equalsIgnoreCase(OPENAIRE_3) || + return (compatibility.equalsIgnoreCase(OPENAIRE_3) || compatibility.equalsIgnoreCase(OPENAIRE_4) || compatibility.equalsIgnoreCase(OPENAIRE_CRIS) || compatibility.equalsIgnoreCase(OPENAIRE_DATA)) && ds.getCollectedfrom().stream().anyMatch(cf -> cf.getKey().equals(EOSC)); - - return isOk; } private static void execBulkTag( @@ -151,13 +148,13 @@ public class SparkBulkTagJob { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + e.name());//writing the tagging in the working dir for entity + .json(outputPath + e.name());// writing the tagging in the working dir for entity - readPath(spark, outputPath + e.name(), resultClazz) //copy the tagging in the actual result output path - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(inputPath + e.name()); + readPath(spark, outputPath + e.name(), resultClazz) // copy the tagging in the actual result output path + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + e.name()); }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index b1720d19d..2ffe6f36d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -66,7 +66,7 @@ public class PrepareDatasourceCountryAssociation { conf, isSparkSessionManaged, spark -> { - //removeOutputDir(spark, outputPath); + // removeOutputDir(spark, outputPath); prepareDatasourceCountryAssociation( spark, Arrays.asList(parser.get("whitelist").split(";")), diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 2b0dd7628..17247f812 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -98,10 +98,10 @@ public class SparkCountryPropagationJob { .json(outputPath); readPath(spark, outputPath, resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(sourcePath); + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(sourcePath); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 9152b1f5a..adb7feef7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -94,10 +94,10 @@ public class SparkResultToCommunityFromOrganizationJob { .json(outputPath + e.name()); readPath(spark, outputPath + e.name(), resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(inputPath + e.name()); + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + e.name()); } }); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index 547891584..229ac7e32 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -104,10 +104,10 @@ public class SparkResultToCommunityFromProject implements Serializable { .json(outputPath + e.name()); readPath(spark, outputPath + e.name(), resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(inputPath + e.name()); + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + e.name()); } }); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index 73c4e2d7c..40c074a6e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -8,7 +8,6 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; -import eu.dnetlib.dhp.api.Utils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.sql.*; @@ -17,6 +16,7 @@ import org.slf4j.LoggerFactory; import com.google.gson.Gson; +import eu.dnetlib.dhp.api.Utils; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.schema.oaf.Relation; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index bb7ff1fb7..a10737849 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -102,10 +102,10 @@ public class SparkResultToCommunityThroughSemRelJob { .json(outputPath); readPath(spark, outputPath, resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(inputPath); + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath); } private static MapFunction, R> contextUpdaterFn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties new file mode 100644 index 000000000..6b9b5063f --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -0,0 +1,15 @@ +sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched +resumeFrom=OrcidPropagation +allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo +allowedsemrelsresultproject=isSupplementedBy;isSupplementTo +allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo +datasourceWhitelistForCountryPropagation=10|openaire____::3795d6478e30e2c9f787d427ff160944;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14 +allowedtypes=pubsrepository::institutional +outputPath=/tmp/miriam/enrichment_one_step +organizationtoresultcommunitymap={"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|ukri________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|ukri________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|ukri________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|ukri________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|ukri________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|ukri________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"], "20|openorgs____::d11f981828c485cd23d93f7f24f24db1":["eut"], "20|openorgs____::e66fe5dd092752e1dd6fd29fc699933a":["eut"], "20|openorgs____::526468206bca24c1c90da6a312295cf4":["eut"], "20|openorgs____::08e311e656e65ccb32e07c66b15b6ff7":["eut"], "20|openorgs____::55a1f889758964b77682904218fdb298":["eut"], "20|openorgs____::530092b6970d60a5329beb9f39e8d7d4":["eut"], "20|openorgs____::aadafa39392b3e200102596a3a4aad9d":["eut"], "20|openorgs____::c3fe999c74fad308132b8a5971367dce":["eut"], "20|openorgs____::1624ff7c01bb641b91f4518539a0c28a":["aurora"], "20|openorgs____::cdda7cfe17c89eb50628ec2eb1f8acd2":["aurora"], "20|openorgs____::818b75030e0e40612d69e049843ede7e":["aurora"], "20|openorgs____::0b0102bae51f4f4ef5ba57fbe1523b92":["aurora"], "20|openorgs____::ed47496b44722f0e9d7b98898189be0d":["aurora"], "20|openorgs____::eb0669daa9efeb898a3090d8aac7c953":["aurora"], "20|openorgs____::eb391317ed0dc684aa81ac16265de041":["aurora"], "20|openorgs____::f7cfcc98245e22c7d6e321cde930e746":["aurora"], "20|openorgs____::f33179d3306ba2599f7a898b056b604f":["aurora"], "20|pending_org_::75c41e6dd18466709ef359323d96fa05":["aurora"]} +pathMap={"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid":"orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} +blacklist=empty +allowedpids=orcid;orcid_pending +baseURL = https://services.openaire.eu/openaire/community/ + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml index a735e2b0e..307997d4c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml @@ -51,7 +51,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${sourcePath}/ - --workingPath${workingDir}/bulktag/ + --outputPath${workingDir}/bulktag/ --pathMap${pathMap} --baseURL${baseURL} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index 6d800d6e2..8e945ee5a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -80,7 +80,14 @@ - + + + + + + + + @@ -258,6 +265,7 @@ --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/publication + --hive_metastore_uris${hive_metastore_uris} @@ -288,6 +296,7 @@ --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/dataset + --hive_metastore_uris${hive_metastore_uris} @@ -318,6 +327,7 @@ --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/otherresearchproduct + --hive_metastore_uris${hive_metastore_uris} @@ -348,21 +358,22 @@ --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/software + --hive_metastore_uris${hive_metastore_uris} - + - - - - - - - - + + + + + + + + From 8752d275fae9bc7764cd2ee049b6321d44b70528 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sat, 9 Dec 2023 15:24:45 +0100 Subject: [PATCH 03/57] removed not needed parameter --- .../SparkOrcidToResultFromSemRelJob.java | 7 ++++--- .../input_orcidtoresult_parameters.json | 6 ------ .../orcidtoresultfromsemrel/oozie_app/workflow.xml | 4 ---- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index a38b4da2e..998f4719a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -2,7 +2,8 @@ package eu.dnetlib.dhp.orcidtoresultfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.List; import java.util.Optional; @@ -65,9 +66,9 @@ public class SparkOrcidToResultFromSemRelJob { Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); - conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - runWithSparkHiveSession( + + runWithSparkSession( conf, isSparkSessionManaged, spark -> { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json index d8aa7eb9a..3cbaa23bb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json @@ -11,12 +11,6 @@ "paramDescription": "true if the new version of the graph must be saved", "paramRequired": false }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, { "paramName": "out", "paramLongName": "outputPath", diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index 8e945ee5a..483a805b1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -265,7 +265,6 @@ --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/publication - --hive_metastore_uris${hive_metastore_uris} @@ -296,7 +295,6 @@ --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/dataset - --hive_metastore_uris${hive_metastore_uris} @@ -327,7 +325,6 @@ --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/otherresearchproduct - --hive_metastore_uris${hive_metastore_uris} @@ -358,7 +355,6 @@ --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/software - --hive_metastore_uris${hive_metastore_uris} From 0d8e496a6317943a28282ffdd0ee5a4d735f61f7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 15 Dec 2023 12:16:43 +0100 Subject: [PATCH 04/57] - --- .../SparkOrcidToResultFromSemRelJob.java | 2 - .../AppendNewRelations.java | 75 +++++++++++++++++++ .../PrepareResultInstRepoAssociation.java | 7 +- ...arkResultToOrganizationFromIstRepoJob.java | 2 +- .../input_prepareresultorg_parameters.json | 13 +--- .../eu/dnetlib/dhp/wf/main/job.properties | 17 ++++- .../oozie_app/workflow.xml | 36 +++++++-- 7 files changed, 131 insertions(+), 21 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 998f4719a..5f9260e5d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.orcidtoresultfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; - import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.List; @@ -67,7 +66,6 @@ public class SparkOrcidToResultFromSemRelJob { SparkConf conf = new SparkConf(); - runWithSparkSession( conf, isSparkSessionManaged, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java new file mode 100644 index 000000000..a5884873b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java @@ -0,0 +1,75 @@ + +package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; + +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; + +import java.io.Serializable; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bulktag.community.ResultTagger; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; + +/** + * @author miriam.baglioni + * @Date 09/12/23 + */ +public class AppendNewRelations implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(AppendNewRelations.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + AppendNewRelations.class + .getResourceAsStream( + "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkHiveSession( + conf, + isSparkSessionManaged, + spark -> appendNewRelation(spark, inputPath, outputPath)); + } + + private static void appendNewRelation(SparkSession spark, String inputPath, String outputPath) { + + readPath(spark, inputPath + "publication/relation", Relation.class) + .union(readPath(spark, inputPath + "dataset/relation", Relation.class)) + .union(readPath(spark, inputPath + "otherresearchproduct/relation", Relation.class)) + .union(readPath(spark, inputPath + "software/relation", Relation.class)) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(outputPath); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 1663afb32..deec6fedc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -52,10 +52,13 @@ public class PrepareResultInstRepoAssociation { String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); - final String datasourceOrganizationPath = parser.get("datasourceOrganizationPath"); + final String workingPath = parser.get("workingPath"); + log.info("workingPath : {}", workingPath); + + final String datasourceOrganizationPath = workingPath + "/preparedInfo/datasourceOrganization"; log.info("datasourceOrganizationPath {}: ", datasourceOrganizationPath); - final String alreadyLinkedPath = parser.get("alreadyLinkedPath"); + final String alreadyLinkedPath = workingPath + "/preparedInfo/alreadyLinked"; log.info("alreadyLinkedPath {}: ", alreadyLinkedPath); List blacklist = Optional diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 0757ebccd..bbad20e2d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -119,7 +119,7 @@ public class SparkResultToOrganizationFromIstRepoJob { "left_outer") .flatMap(createRelationFn(), Encoders.bean(Relation.class)) .write() - .mode(SaveMode.Append) + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json index 2f00bacae..3f4b1d151 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json @@ -11,16 +11,11 @@ "paramDescription": "the hive metastore uris", "paramRequired": true }, + { - "paramName":"dop", - "paramLongName":"datasourceOrganizationPath", - "paramDescription": "path where to store/find association from datasource and organization", - "paramRequired": true - }, - { - "paramName":"alp", - "paramLongName":"alreadyLinkedPath", - "paramDescription": "path where to store/find already linked results and organizations", + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the working path", "paramRequired": true }, { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 6b9b5063f..243c1e99d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,5 +1,5 @@ sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched -resumeFrom=OrcidPropagation +resumeFrom=AffiliationInstitutionalRepository allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo @@ -7,7 +7,20 @@ datasourceWhitelistForCountryPropagation=10|openaire____::3795d6478e30e2c9f787d4 allowedtypes=pubsrepository::institutional outputPath=/tmp/miriam/enrichment_one_step organizationtoresultcommunitymap={"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|ukri________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|ukri________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|ukri________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|ukri________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|ukri________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|ukri________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"], "20|openorgs____::d11f981828c485cd23d93f7f24f24db1":["eut"], "20|openorgs____::e66fe5dd092752e1dd6fd29fc699933a":["eut"], "20|openorgs____::526468206bca24c1c90da6a312295cf4":["eut"], "20|openorgs____::08e311e656e65ccb32e07c66b15b6ff7":["eut"], "20|openorgs____::55a1f889758964b77682904218fdb298":["eut"], "20|openorgs____::530092b6970d60a5329beb9f39e8d7d4":["eut"], "20|openorgs____::aadafa39392b3e200102596a3a4aad9d":["eut"], "20|openorgs____::c3fe999c74fad308132b8a5971367dce":["eut"], "20|openorgs____::1624ff7c01bb641b91f4518539a0c28a":["aurora"], "20|openorgs____::cdda7cfe17c89eb50628ec2eb1f8acd2":["aurora"], "20|openorgs____::818b75030e0e40612d69e049843ede7e":["aurora"], "20|openorgs____::0b0102bae51f4f4ef5ba57fbe1523b92":["aurora"], "20|openorgs____::ed47496b44722f0e9d7b98898189be0d":["aurora"], "20|openorgs____::eb0669daa9efeb898a3090d8aac7c953":["aurora"], "20|openorgs____::eb391317ed0dc684aa81ac16265de041":["aurora"], "20|openorgs____::f7cfcc98245e22c7d6e321cde930e746":["aurora"], "20|openorgs____::f33179d3306ba2599f7a898b056b604f":["aurora"], "20|pending_org_::75c41e6dd18466709ef359323d96fa05":["aurora"]} -pathMap={"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid":"orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} +pathMap ={"author":"$['author'][*]['fullname']", \ + "title":"$['title'][*]['value']",\ + "orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" ,\ + "orcid_pending":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid_pending')]['value']" ,\ + "contributor" : "$['contributor'][*]['value']",\ + "description" : "$['description'][*]['value']",\ + "subject" :"$['subject'][*]['value']" , \ + "fos" : "$['subject'][?(@['qualifier']['classid']=='FOS')].value" ,\ + "sdg" : "$['subject'][?(@['qualifier']['classid']=='SDG')].value",\ + "journal":"$['journal'].name",\ + "hostedby":"$['instance'][*]['hostedby']['key']",\ + "collectedfrom":"$['instance'][*]['collectedfrom']['key']",\ + "publisher":"$['publisher'].value",\ + "publicationyear":"$['dateofacceptance'].value"} blacklist=empty allowedpids=orcid;orcid_pending baseURL = https://services.openaire.eu/openaire/community/ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml index 8281130f3..dadea2d28 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -47,6 +47,7 @@ --sourcePath${sourcePath} --workingPath${workingDir}/affiliationInstRepo --blacklist${blacklist} + --hive_metastore_uris${hive_metastore_uris} @@ -78,7 +79,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/publication - --outputPath${sourcePath}/relation + --outputPath${workingDir}/affiliationinstrepo/publication/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -107,7 +108,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/dataset - --outputPath${sourcePath}/relation + --outputPath${workingDir}/affiliationinstrepo/dataset/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -136,7 +137,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/otherresearchproduct - --outputPath${sourcePath}/relation + --outputPath${workingDir}/affiliationinstrepo/otherresearchproduct/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -165,7 +166,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/software - --outputPath${sourcePath}/relation + --outputPath${workingDir}/affiliationinstrepo/software/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -175,7 +176,32 @@ - + + + + + yarn + cluster + append new relations + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.AppendNewRelations + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --outputPath${sourcePath}/relation + --sourcePath${workingDir}/affiliationinstrepo/ + + + + From 01ce0b9c7626cb853acd218db011a498cb8d06e2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 15 Dec 2023 12:24:55 +0100 Subject: [PATCH 05/57] [doiboost - preprocess] remove transition to orcid preparation from sequence of steps at the beginning of the workflow --- .../dhp/doiboost/preprocess/oozie_app/workflow.xml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index ed6853229..309031ee4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -34,18 +34,6 @@ the MAG working path - - - - inputPathOrcid - the ORCID input path - - - - workingPathOrcid - the ORCID working path - - @@ -65,7 +53,6 @@ ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} - ${wf:conf('resumeFrom') eq 'PreProcessORCID'} From 3eca5d2e1c302a7427ffa735c95ac96a6419caec Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 18 Dec 2023 09:55:27 +0100 Subject: [PATCH 06/57] - --- ...SemRel.java => SparkEntityToOrganizationFromSemRel.java} | 4 ++-- .../dhp/entitytoorganizationfromsemrel/StepActions.java | 5 ++--- .../entitytoorganizationfromsemrel/oozie_app/workflow.xml | 2 +- .../main/resources/eu/dnetlib/dhp/wf/main/job.properties | 2 +- .../resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml | 2 +- .../entitytoorganizationfromsemrel/oozie_app/workflow.xml | 6 +++--- .../dhp/entitytoorganizationfromsemrel/SparkJobTest.java | 6 +++--- 7 files changed, 13 insertions(+), 14 deletions(-) rename dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/{SparkResultToOrganizationFromSemRel.java => SparkEntityToOrganizationFromSemRel.java} (98%) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java similarity index 98% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java index 27e502aba..87c0ec2b9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java @@ -27,8 +27,8 @@ import eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganization import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; -public class SparkResultToOrganizationFromSemRel implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class); +public class SparkEntityToOrganizationFromSemRel implements Serializable { + private static final Logger log = LoggerFactory.getLogger(SparkEntityToOrganizationFromSemRel.class); private static final int MAX_ITERATION = 5; public static final String NEW_RESULT_RELATION_PATH = "/newResultRelation"; public static final String NEW_PROJECT_RELATION_PATH = "/newProjectRelation"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java index 386ea1a5c..36a7523c5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java @@ -3,8 +3,8 @@ package eu.dnetlib.dhp.entitytoorganizationfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.readPath; -import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_PROJECT_RELATION_PATH; -import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_RESULT_RELATION_PATH; +import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel.NEW_PROJECT_RELATION_PATH; +import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel.NEW_RESULT_RELATION_PATH; import java.io.Serializable; import java.util.*; @@ -20,7 +20,6 @@ import org.jetbrains.annotations.NotNull; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.KeyValueSet; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml index 16c8c4e19..851aabe8b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -162,7 +162,7 @@ yarn cluster resultToOrganizationFromSemRel - eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel + eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 243c1e99d..6085cd2b2 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,5 +1,5 @@ sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched -resumeFrom=AffiliationInstitutionalRepository +resumeFrom=default allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index 1e6736bf4..33f849645 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -120,7 +120,7 @@ ${wf:conf('resumeFrom') eq 'BulkTagging'} ${wf:conf('resumeFrom') eq 'AffiliationInstitutionalRepository'} - ${wf:conf('resumeFrom') eq 'AffiliationSemanticRelation'} + ${wf:conf('resumeFrom') eq 'AffiliationSemanticRelation'} ${wf:conf('resumeFrom') eq 'CommunityOrganization'} ${wf:conf('resumeFrom') eq 'ResultProject'} ${wf:conf('resumeFrom') eq 'CommunityProject'} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml index e3f3c1758..dbb22b994 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -29,7 +29,7 @@ yarn cluster - PrepareResultOrganizationAssociation + PrepareResultProjectOrganizationAssociation eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo dhp-enrichment-${projectVersion}.jar @@ -57,8 +57,8 @@ yarn cluster - resultToOrganizationFromSemRel - eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel + entityToOrganizationFromSemRel + eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java index 517a20cd9..db917658a 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java @@ -114,7 +114,7 @@ public class SparkJobTest { .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); - SparkResultToOrganizationFromSemRel + SparkEntityToOrganizationFromSemRel .main( new String[] { @@ -395,7 +395,7 @@ public class SparkJobTest { .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); - SparkResultToOrganizationFromSemRel + SparkEntityToOrganizationFromSemRel .main( new String[] { @@ -678,7 +678,7 @@ public class SparkJobTest { .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); - SparkResultToOrganizationFromSemRel + SparkEntityToOrganizationFromSemRel .main( new String[] { From 9d342a47da489d71c3a739b06943a4f09a1225ee Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 18 Dec 2023 11:48:57 +0100 Subject: [PATCH 07/57] updated the transformation Baseline workflow to include mdstore rollback/commit action --- .../dhp/sx/bio/pubmed/oozie_app/workflow.xml | 69 +++++++++++++++++-- .../ebi/SparkCreateBaselineDataFrame.scala | 18 +++-- 2 files changed, 78 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml index 8915a090b..30eb41469 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + baselineWorkingPath @@ -9,8 +9,12 @@ The IS lookUp service endopoint - targetPath - The target path + mdStoreOutputId + the identifier of the cleaned MDStore + + + mdStoreManagerURI + the path of the cleaned mdstore skipUpdate @@ -19,12 +23,31 @@ - + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionNEW_VERSION + --mdStoreID${mdStoreOutputId} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + yarn @@ -43,16 +66,52 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --workingPath${baselineWorkingPath} - --targetPath${targetPath} + --mdstoreOutputVersion${wf:actionData('StartTransaction')['mdStoreVersion']} --masteryarn --isLookupUrl${isLookupUrl} --hdfsServerUri${nameNode} --skipUpdate${skipUpdate} + + + + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionCOMMIT + --namenode${nameNode} + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionROLLBACK + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 8ac8b00bf..639918151 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -2,9 +2,12 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.collection.CollectionUtils +import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup +import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} import eu.dnetlib.dhp.sx.bio.pubmed._ +import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile} import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration @@ -164,11 +167,15 @@ object SparkCreateBaselineDataFrame { val workingPath = parser.get("workingPath") log.info("workingPath: {}", workingPath) - val targetPath = parser.get("targetPath") - log.info("targetPath: {}", targetPath) + val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") + log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion) + + val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) + val outputBasePath = cleanedMdStoreVersion.getHdfsPath + log.info("outputBasePath: {}", outputBasePath) val hdfsServerUri = parser.get("hdfsServerUri") - log.info("hdfsServerUri: {}", targetPath) + log.info("hdfsServerUri: {}", hdfsServerUri) val skipUpdate = parser.get("skipUpdate") log.info("skipUpdate: {}", skipUpdate) @@ -216,8 +223,11 @@ object SparkCreateBaselineDataFrame { .map(a => PubMedToOaf.convert(a, vocabularies)) .as[Oaf] .filter(p => p != null), - targetPath + s"$outputBasePath/$MDSTORE_DATA_PATH" ) + val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH") + val mdStoreSize = df.count + writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH") } } From 15fd93a2b66f6829cfac0b1350266664371d1df5 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 18 Dec 2023 12:21:55 +0100 Subject: [PATCH 08/57] uploaded input parameters on CreateBaseline WF --- .../eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json index 8dc8a2aae..3ba83764d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json @@ -2,7 +2,7 @@ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"i", "paramLongName":"isLookupUrl", "paramDescription": "isLookupUrl", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the oaf path ", "paramRequired": true}, + {"paramName":"mo", "paramLongName":"mdstoreOutputVersion", "paramDescription": "the oaf path ", "paramRequired": true}, {"paramName":"s", "paramLongName":"skipUpdate", "paramDescription": "skip update ", "paramRequired": false}, {"paramName":"h", "paramLongName":"hdfsServerUri", "paramDescription": "the working path ", "paramRequired": true} ] \ No newline at end of file From d410ea8a4176341cdebaa76179c77b5fdd45c631 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 19 Dec 2023 12:15:01 +0100 Subject: [PATCH 09/57] added needed parameter --- .../AppendNewRelations.java | 11 +- .../oozie_app/workflow.xml | 112 ++---------------- .../input_newrelation_parameters.json | 20 ++++ .../eu/dnetlib/dhp/wf/main/job.properties | 4 +- .../dhp/wf/main/oozie_app/workflow.xml | 10 +- 5 files changed, 41 insertions(+), 116 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java index a5884873b..636c14b65 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java @@ -2,26 +2,19 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.Objects; -import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bulktag.community.ResultTagger; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; /** * @author miriam.baglioni @@ -54,7 +47,7 @@ public class AppendNewRelations implements Serializable { SparkConf conf = new SparkConf(); - runWithSparkHiveSession( + runWithSparkSession( conf, isSparkSessionManaged, spark -> appendNewRelation(spark, inputPath, outputPath)); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml index 851aabe8b..d7335d840 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -5,9 +5,10 @@ the source path - outputPath - sets the outputPath + iterations + the number of hops to be done up on the hierarchy + @@ -21,119 +22,26 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - ${wf:conf('resumeFrom') eq 'PrepareInfo'} - - - - - - + + - + - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/publication - ${nameNode}/${outputPath}/publication - - - - - - - - ${nameNode}/${sourcePath}/dataset - ${nameNode}/${outputPath}/dataset - - - - - - - - ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${outputPath}/otherresearchproduct - - - - - - - - ${nameNode}/${sourcePath}/software - ${nameNode}/${outputPath}/software - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - yarn cluster - PrepareResultOrganizationAssociation + PrepareResultProjectOrganizationAssociation eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo dhp-enrichment-${projectVersion}.jar @@ -161,7 +69,7 @@ yarn cluster - resultToOrganizationFromSemRel + resultProjectToOrganizationFromSemRel eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel dhp-enrichment-${projectVersion}.jar @@ -177,7 +85,7 @@ --conf spark.sql.shuffle.partitions=3840 --relationPath${workingDir}/preparedInfo/relation - --outputPath${outputPath}/relation + --outputPath${sourcePath}/relation --leavesPath${workingDir}/preparedInfo/leavesPath --childParentPath${workingDir}/preparedInfo/childParentPath --resultOrgPath${workingDir}/preparedInfo/resultOrgPath diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json new file mode 100644 index 000000000..5fe92cff1 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + },{ + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "institutional repositories that should not be considered for the propagation", + "paramRequired": false +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 6085cd2b2..93e9e0ab1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,5 +1,5 @@ sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched -resumeFrom=default +resumeFrom=AffiliationSemanticRelation allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo @@ -24,5 +24,5 @@ pathMap ={"author":"$['author'][*]['fullname']", \ blacklist=empty allowedpids=orcid;orcid_pending baseURL = https://services.openaire.eu/openaire/community/ - +iterations=1 diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index 33f849645..de054b962 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -195,13 +195,13 @@ - + - + - ${wf:appPath()}/affiliation_semantic_relation + ${wf:appPath()}/entity_semantic_relation @@ -209,6 +209,10 @@ sourcePath ${outputPath} + + iterations + ${iterations} + From 4740c808f735193e8975f09e1a6841eb4d9a676f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Dec 2023 14:26:54 +0100 Subject: [PATCH 10/57] - --- .../PrepareDatasourceCountryAssociation.java | 2 +- .../bulktag/datasourcemaster_parameters.json | 32 -- .../dhp/bulktag/input_bulkTag_parameters.json | 38 -- .../dhp/bulktag/input_eoscTag_parameters.json | 21 - .../input_eosc_bulkTag_parameters.json | 41 -- .../dhp/bulktag/oozie_app/config-default.xml | 54 --- .../dhp/bulktag/oozie_app/workflow.xml | 120 ------ .../input_countrypropagation_parameters.json | 32 -- .../input_prepareassoc_parameters.json | 32 -- ...input_prepareresultcountry_parameters.json | 38 -- .../oozie_app/config-default.xml | 58 --- .../countrypropagation/oozie_app/workflow.xml | 375 ------------------ .../input_preparation_parameter.json | 50 --- .../input_propagation_parameter.json | 62 --- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 105 ----- .../input_orcidtoresult_parameters.json | 44 -- ...input_prepareorcidtoresult_parameters.json | 38 -- ...nput_prepareorcidtoresult_parameters2.json | 20 - .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 371 ----------------- ...put_prepareprojecttoresult_parameters.json | 33 -- .../input_projecttoresult_parameters.json | 44 -- .../oozie_app/config-default.xml | 63 --- .../projecttoresult/oozie_app/workflow.xml | 184 --------- .../input_communitytoresult_parameters.json | 28 -- ...t_preparecommunitytoresult_parameters.json | 33 -- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 147 ------- .../input_communitytoresult_parameters.json | 28 -- ...t_preparecommunitytoresult_parameters.json | 28 -- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 144 ------- .../input_communitytoresult_parameters.json | 52 --- ..._preparecommunitytoresult2_parameters.json | 20 - ...t_preparecommunitytoresult_parameters.json | 44 -- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 366 ----------------- .../input_newrelation_parameters.json | 20 - .../input_prepareresultorg_parameters.json | 32 -- ...sulaffiliationfrominstrepo_parameters.json | 56 --- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 277 ------------- .../eu/dnetlib/dhp/wf/main/job.properties | 8 +- .../dhp/wf/main/oozie_app/workflow.xml | 6 +- .../bulktag/oozie_app/workflow.xml | 10 +- .../countrypropagation/oozie_app/workflow.xml | 10 +- .../oozie_app/workflow.xml | 10 +- .../projecttoresult/oozie_app/workflow.xml | 12 +- .../oozie_app/workflow.xml | 15 +- .../oozie_app/workflow.xml | 11 +- .../oozie_app/workflow.xml | 68 +++- .../oozie_app/workflow.xml | 21 +- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 97 ----- .../graph/hostedbymap/oozie_app/download.sh | 2 +- 56 files changed, 127 insertions(+), 3681 deletions(-) delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 2ffe6f36d..430c26592 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -90,7 +90,7 @@ public class PrepareDatasourceCountryAssociation { (FilterFunction) ds -> !ds.getDataInfo().getDeletedbyinference() && Optional.ofNullable(ds.getDatasourcetype()).isPresent() && Optional.ofNullable(ds.getDatasourcetype().getClassid()).isPresent() && - (allowedtypes.contains(ds.getDatasourcetype().getClassid()) || + (allowedtypes.contains(ds.getJurisdiction().getClassid()) || whitelist.contains(ds.getId()))); // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json deleted file mode 100644 index 9a2eadaa7..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName": "p", - "paramLongName": "hdfsPath", - "paramDescription": "the path where storing the sequential file", - "paramRequired": true - }, - { - "paramName": "nn", - "paramLongName": "hdfsNameNode", - "paramDescription": "the name node on hdfs", - "paramRequired": true - }, - { - "paramName": "pgurl", - "paramLongName": "postgresUrl", - "paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb", - "paramRequired": true - }, - { - "paramName": "pguser", - "paramLongName": "postgresUser", - "paramDescription": "postgres user", - "paramRequired": false - }, - { - "paramName": "pgpasswd", - "paramLongName": "postgresPassword", - "paramDescription": "postgres password", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json deleted file mode 100644 index ce1a8ecab..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "pm", - "paramLongName":"pathMap", - "paramDescription": "the json path associated to each selection field", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "tg", - "paramLongName": "taggingConf", - "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", - "paramRequired": false - }, - { - "paramName": "bu", - "paramLongName": "baseURL", - "paramDescription": "this parameter is to specify the api to be queried (beta or production)", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json deleted file mode 100644 index 4c25fea01..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json +++ /dev/null @@ -1,21 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "wp", - "paramLongName": "workingPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json deleted file mode 100644 index 5aace346d..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json +++ /dev/null @@ -1,41 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "dmp", - "paramLongName":"datasourceMapPath", - "paramDescription": "the path where the association datasource master has been stored", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName": "wp", - "paramLongName": "workingPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - - "paramName": "rt", - "paramLongName": "resultType", - "paramDescription": "the result type", - "paramRequired": true - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml deleted file mode 100644 index fe82ae194..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml +++ /dev/null @@ -1,54 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml deleted file mode 100644 index 0d4d1f046..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - sourcePath - the source path - - - pathMap - the json path associated to each selection field - - - outputPath - the output path - - - baseURL - the community API base URL - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn-cluster - cluster - bulkTagging-result - eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-enrichment-${projectVersion}.jar - - --num-executors=${sparkExecutorNumber} - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/ - --outputPath${outputPath}/ - --pathMap${pathMap} - --baseURL${baseURL} - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json deleted file mode 100644 index f217e2458..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "p", - "paramLongName": "preparedInfoPath", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json deleted file mode 100644 index a00105f2b..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "w", - "paramLongName": "whitelist", - "paramDescription": "the datasource having a type different from the allowed ones but that we want to add anyway", - "paramRequired": true - }, - { - "paramName": "at", - "paramLongName": "allowedtypes", - "paramDescription": "the allowed datasource types for country propagation", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json deleted file mode 100644 index 18163d1f9..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"out", - "paramLongName":"outputPath", - "paramDescription": "the output path", - "paramRequired": true - }, - { - "paramName":"w", - "paramLongName":"workingPath", - "paramDescription": "the working path", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName": "p", - "paramLongName": "preparedInfoPath", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92b..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml deleted file mode 100644 index 271ccbf72..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml +++ /dev/null @@ -1,375 +0,0 @@ - - - - sourcePath - the source path - - - whitelist - the white list - - - allowedtypes - the allowed types - - - outputPath - the output path - - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn - cluster - PrepareDatasourceCountryAssociation - eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath} - --whitelist${whitelist} - --allowedtypes${allowedtypes} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - yarn - cluster - prepareResultCountry-Publication - eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/publication - --outputPath${workingDir}/publication - --workingPath${workingDir}/workingP - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - prepareResultCountry-Dataset - eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/dataset - --outputPath${workingDir}/dataset - --workingPath${workingDir}/workingD - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - prepareResultCountry-ORP - eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/otherresearchproduct - --outputPath${workingDir}/otherresearchproduct - --workingPath${workingDir}/workingO - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - prepareResultCountry-Software - eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/software - --outputPath${workingDir}/software - --workingPath${workingDir}/workingS - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - - - yarn - cluster - countryPropagationForPublications - eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/publication - --preparedInfoPath${workingDir}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication - - - - - - - - yarn - cluster - countryPropagationForDataset - eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/dataset - --preparedInfoPath${workingDir}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - - - - - - - - yarn - cluster - countryPropagationForORP - eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/otherresearchproduct - --preparedInfoPath${workingDir}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - - - - - - - - yarn - cluster - countryPropagationForSoftware - eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/software - --preparedInfoPath${workingDir}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json deleted file mode 100644 index b59937331..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json +++ /dev/null @@ -1,50 +0,0 @@ -[ - { - "paramName":"gp", - "paramLongName":"graphPath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName":"lp", - "paramLongName":"leavesPath", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName":"cp", - "paramLongName":"childParentPath", - "paramDescription": "path where to store/find association from datasource and organization", - "paramRequired": true - }, - { - "paramName":"rp", - "paramLongName":"resultOrgPath", - "paramDescription": "path where to store/find already linked results and organizations", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - }, - { - "paramName": "rep", - "paramLongName": "relationPath", - "paramDescription": "the path where to store the selected subset of relations", - "paramRequired": false - }, - { - "paramName": "pop", - "paramLongName": "projectOrganizationPath", - "paramDescription": "the number of iterations to be computed", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json deleted file mode 100644 index 66a7f5b2f..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json +++ /dev/null @@ -1,62 +0,0 @@ -[ - { - "paramName":"rep", - "paramLongName":"relationPath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName":"lp", - "paramLongName":"leavesPath", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName":"cp", - "paramLongName":"childParentPath", - "paramDescription": "path where to store/find association from datasource and organization", - "paramRequired": true - }, - { - "paramName":"rp", - "paramLongName":"resultOrgPath", - "paramDescription": "path where to store/find already linked results and organizations", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - }, - { - "paramName": "wd", - "paramLongName": "workingDir", - "paramDescription": "true if it is a test running", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "it", - "paramLongName": "iterations", - "paramDescription": "the number of iterations to be computed", - "paramRequired": false - }, - { - "paramName": "pop", - "paramLongName": "projectOrganizationPath", - "paramDescription": "the number of iterations to be computed", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92b..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml deleted file mode 100644 index d7335d840..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ /dev/null @@ -1,105 +0,0 @@ - - - - sourcePath - the source path - - - iterations - the number of hops to be done up on the hierarchy - - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - yarn - cluster - PrepareResultProjectOrganizationAssociation - eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --graphPath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --leavesPath${workingDir}/preparedInfo/leavesPath - --childParentPath${workingDir}/preparedInfo/childParentPath - --resultOrgPath${workingDir}/preparedInfo/resultOrgPath - --projectOrganizationPath${workingDir}/preparedInfo/projectOrganizationPath - --relationPath${workingDir}/preparedInfo/relation - - - - - - - - yarn - cluster - resultProjectToOrganizationFromSemRel - eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.sql.shuffle.partitions=3840 - - --relationPath${workingDir}/preparedInfo/relation - --outputPath${sourcePath}/relation - --leavesPath${workingDir}/preparedInfo/leavesPath - --childParentPath${workingDir}/preparedInfo/childParentPath - --resultOrgPath${workingDir}/preparedInfo/resultOrgPath - --projectOrganizationPath${workingDir}/preparedInfo/projectOrganizationPath - --hive_metastore_uris${hive_metastore_uris} - --workingDir${workingDir}/working - --iterations${iterations} - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json deleted file mode 100644 index 3cbaa23bb..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json +++ /dev/null @@ -1,44 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"sg", - "paramLongName":"saveGraph", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName":"pu", - "paramLongName":"possibleUpdatesPath", - "paramDescription": "the path the the association resultId orcid author list can be found", - "paramRequired": true - }, - { - "paramName":"test", - "paramLongName":"isTest", - "paramDescription": "true if it is executing a test", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json deleted file mode 100644 index 08648d61a..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"as", - "paramLongName":"allowedsemrels", - "paramDescription": "the allowed sematinc relations for propagation", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json deleted file mode 100644 index 1a67134a6..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml deleted file mode 100644 index 8d2c34105..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml deleted file mode 100644 index 5f52c1658..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ /dev/null @@ -1,371 +0,0 @@ - - - - sourcePath - the source path - - - allowedsemrels - the semantic relationships allowed for propagation - - - outputPath - the output path - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-Publications - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-Dataset - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-ORP - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-Software - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase2 - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${workingDir}/preparedInfo/targetOrcidAssoc - --outputPath${workingDir}/preparedInfo/mergedOrcidAssoc - - - - - - - - - - - - - - - yarn - cluster - ORCIDPropagation-Publication - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath${sourcePath}/publication - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication - - - - - - - - yarn - cluster - ORCIDPropagation-Dataset - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath${sourcePath}/dataset - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - - - - - - - - yarn - cluster - ORCIDPropagation-ORP - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath${sourcePath}/otherresearchproduct - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - - - - - - - - yarn - cluster - ORCIDPropagation-Software - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath${sourcePath}/software - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json deleted file mode 100644 index a70dbd6a0..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json +++ /dev/null @@ -1,33 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName":"asr", - "paramLongName":"allowedsemrels", - "paramDescription": "the types of the allowed datasources. Split by ;", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName":"pu", - "paramLongName":"potentialUpdatePath", - "paramDescription": "the path of the potential updates ", - "paramRequired": true - }, - { - "paramName":"al", - "paramLongName":"alreadyLinkedPath", - "paramDescription": "the path of the already linked project result_set", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json deleted file mode 100644 index 7f44ba03c..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json +++ /dev/null @@ -1,44 +0,0 @@ -[ - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName":"sg", - "paramLongName":"saveGraph", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName":"pu", - "paramLongName":"potentialUpdatePath", - "paramDescription": "the path of the potential updates ", - "paramRequired": true - }, - { - "paramName":"al", - "paramLongName":"alreadyLinkedPath", - "paramDescription": "the path of the already linked project result_set", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "test", - "paramLongName": "isTest", - "paramDescription": "true if it is a test running", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml deleted file mode 100644 index caf3c6050..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - jobTracker - yarnRM - - - - nameNode - - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml deleted file mode 100644 index 9e91c06fb..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml +++ /dev/null @@ -1,184 +0,0 @@ - - - - sourcePath - the source path - - - allowedsemrels - the allowed semantics - - - outputPath - the output path - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/publication - ${nameNode}/${outputPath}/publication - - - - - - - - ${nameNode}/${sourcePath}/dataset - ${nameNode}/${outputPath}/dataset - - - - - - - - ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${outputPath}/otherresearchproduct - - - - - - - - ${nameNode}/${sourcePath}/software - ${nameNode}/${outputPath}/software - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn - cluster - PrepareProjectResultsAssociation - eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/relation - --allowedsemrels${allowedsemrels} - --hive_metastore_uris${hive_metastore_uris} - --potentialUpdatePath${workingDir}/preparedInfo/potentialUpdates - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - - - - - - - - yarn - cluster - ProjectToResultPropagation - eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --saveGraph${saveGraph} - --hive_metastore_uris${hive_metastore_uris} - --outputPath${outputPath}/relation - --potentialUpdatePath${workingDir}/preparedInfo/potentialUpdates - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json deleted file mode 100644 index 0db8085d1..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json +++ /dev/null @@ -1,28 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "p", - "paramLongName": "preparedInfoPath", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": true - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json deleted file mode 100644 index 3601db7ac..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json +++ /dev/null @@ -1,33 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "bu", - "paramLongName": "baseURL", - "paramDescription": "the base URL to the community API to use", - "paramRequired": false - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92b..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml deleted file mode 100644 index dfa762ac6..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml +++ /dev/null @@ -1,147 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - the output path - - - baseURL - the community API base URL - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn - cluster - Prepare-Community-Result-Organization - eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=6 - --executor-memory=5G - --conf spark.executor.memoryOverhead=3g - --conf spark.sql.shuffle.partitions=3284 - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/preparedInfo/resultCommunityList - --hive_metastore_uris${hive_metastore_uris} - --baseURL${baseURL} - - - - - - - - yarn - cluster - community2resultfromorganization - eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=6 - --executor-memory=5G - --conf spark.executor.memoryOverhead=3g - --conf spark.sql.shuffle.partitions=3284 - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList - --sourcePath${sourcePath}/ - --outputPath${outputPath}/ - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json deleted file mode 100644 index 0db8085d1..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json +++ /dev/null @@ -1,28 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "p", - "paramLongName": "preparedInfoPath", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": true - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json deleted file mode 100644 index cbc01c2d5..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json +++ /dev/null @@ -1,28 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "bu", - "paramLongName": "baseURL", - "paramDescription": "the path used to store temporary output files", - "paramRequired": false - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92b..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml deleted file mode 100644 index 21cc2d887..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml +++ /dev/null @@ -1,144 +0,0 @@ - - - - sourcePath - the source path - - - - outputPath - the output path - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn - cluster - Prepare-Community-Result-Organization - eu.dnetlib.dhp.resulttocommunityfromproject.PrepareResultCommunitySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=6 - --executor-memory=5G - --conf spark.executor.memoryOverhead=3g - --conf spark.sql.shuffle.partitions=3284 - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/preparedInfo/resultCommunityList - --production${production} - - - - - - - - yarn - cluster - community2resultfromproject - eu.dnetlib.dhp.resulttocommunityfromproject.SparkResultToCommunityFromProject - dhp-enrichment-${projectVersion}.jar - - --executor-cores=6 - --executor-memory=5G - --conf spark.executor.memoryOverhead=3g - --conf spark.sql.shuffle.partitions=3284 - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList - --sourcePath${sourcePath}/ - --outputPath${outputPath}/ - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json deleted file mode 100644 index a40ce375e..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json +++ /dev/null @@ -1,52 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"sg", - "paramLongName":"saveGraph", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName": "p", - "paramLongName": "preparedInfoPath", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": true - }, - { - "paramName":"test", - "paramLongName":"isTest", - "paramDescription": "true if it is executing a test", - "paramRequired": false - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json deleted file mode 100644 index 3ba3c8e9c..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json deleted file mode 100644 index 271db10bb..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json +++ /dev/null @@ -1,44 +0,0 @@ -[ - { - "paramName":"bu", - "paramLongName":"baseURL", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": true - }, - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"as", - "paramLongName":"allowedsemrels", - "paramDescription": "the allowed semantic relations for propagation", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92b..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml deleted file mode 100644 index 916eb8b7c..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ /dev/null @@ -1,366 +0,0 @@ - - - - sourcePath - the source path - - - allowedsemrels - the semantic relationships allowed for propagation - - - baseURL - the baseurl for the comminity APIs - - - outputPath - the output path - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - - - - - - - - yarn - cluster - ResultToCommunitySemRel-PreparePhase1-Publications - eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc - --allowedsemrels${allowedsemrels} - --baseURL${baseURL} - - - - - - - - yarn - cluster - ResultToCommunitySemRel-PreparePhase1-Dataset - eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc - --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn - cluster - ResultToCommunitySemRel-PreparePhase1-ORP - eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc - --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn - cluster - ResultToCommunitySemRel-PreparePhase1-Software - eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc - --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} - - - - - - - - - - yarn - cluster - ResultToCommunityEmRelPropagation-PreparePhase2 - eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${workingDir}/preparedInfo/targetCommunityAssoc - --outputPath${workingDir}/preparedInfo/mergedCommunityAssoc - - - - - - - - - - - - - - - yarn - cluster - Result2CommunitySemRelPropagation-Publication - eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc - --sourcePath${sourcePath}/publication - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication - --saveGraph${saveGraph} - - - - - - - - yarn - cluster - Result2CommunitySemRelPropagation-Dataset - eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc - --sourcePath${sourcePath}/dataset - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - --saveGraph${saveGraph} - - - - - - - - yarn - cluster - Result2CommunitySemRelPropagation-ORP - eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc - --sourcePath${sourcePath}/otherresearchproduct - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - --saveGraph${saveGraph} - - - - - - - - yarn - cluster - Result2CommunitySemRelPropagation-Software - eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc - --sourcePath${sourcePath}/software - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - --saveGraph${saveGraph} - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json deleted file mode 100644 index 5fe92cff1..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - },{ - "paramName": "o", - "paramLongName": "outputPath", - "paramDescription": "institutional repositories that should not be considered for the propagation", - "paramRequired": false -} -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json deleted file mode 100644 index 3f4b1d151..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - - { - "paramName":"wp", - "paramLongName":"workingPath", - "paramDescription": "the working path", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - },{ - "paramName": "bl", - "paramLongName": "blacklist", - "paramDescription": "institutional repositories that should not be considered for the propagation", - "paramRequired": false -} -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json deleted file mode 100644 index d2b076c82..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json +++ /dev/null @@ -1,56 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName":"sg", - "paramLongName":"saveGraph", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName":"dop", - "paramLongName":"datasourceOrganizationPath", - "paramDescription": "path where to store/find association from datasource and organization", - "paramRequired": true - }, - { - "paramName":"alp", - "paramLongName":"alreadyLinkedPath", - "paramDescription": "path where to store/find already linked results and organizations", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - }, - { - "paramName": "test", - "paramLongName": "isTest", - "paramDescription": "true if it is a test running", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92b..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml deleted file mode 100644 index edfff8817..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ /dev/null @@ -1,277 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - sets the outputPath - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/publication - ${nameNode}/${outputPath}/publication - - - - - - - - ${nameNode}/${sourcePath}/dataset - ${nameNode}/${outputPath}/dataset - - - - - - - - ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${outputPath}/otherresearchproduct - - - - - - - - ${nameNode}/${sourcePath}/software - ${nameNode}/${outputPath}/software - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn - cluster - PrepareResultOrganizationAssociation - eu.dnetlib.dhp.resulttoorganizationfrominstrepo.PrepareResultInstRepoAssociation - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - --blacklist${blacklist} - - - - - - - - - - - - - - - yarn - cluster - resultToOrganizationFromInstRepoPropagationForPublications - eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/publication - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - - - - - - - - yarn - cluster - resultToOrganizationFromInstRepoPropagationForDataset - eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/dataset - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - - - - - - - - yarn - cluster - resultToOrganizationFromInstRepoPropagationForORP - eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/otherresearchproduct - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - - - - - - - - yarn - cluster - resultToOrganizationFromInstRepoPropagationForSoftware - eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/software - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 93e9e0ab1..4cb759343 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,12 +1,12 @@ sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched -resumeFrom=AffiliationSemanticRelation +resumeFrom=CountryPropagation allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo -datasourceWhitelistForCountryPropagation=10|openaire____::3795d6478e30e2c9f787d427ff160944;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14 -allowedtypes=pubsrepository::institutional +datasourceWhitelistForCountryPropagation=10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48 +#allowedtypes=pubsrepository::institutional +allowedtypes=Institutional outputPath=/tmp/miriam/enrichment_one_step -organizationtoresultcommunitymap={"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|ukri________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|ukri________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|ukri________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|ukri________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|ukri________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|ukri________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"], "20|openorgs____::d11f981828c485cd23d93f7f24f24db1":["eut"], "20|openorgs____::e66fe5dd092752e1dd6fd29fc699933a":["eut"], "20|openorgs____::526468206bca24c1c90da6a312295cf4":["eut"], "20|openorgs____::08e311e656e65ccb32e07c66b15b6ff7":["eut"], "20|openorgs____::55a1f889758964b77682904218fdb298":["eut"], "20|openorgs____::530092b6970d60a5329beb9f39e8d7d4":["eut"], "20|openorgs____::aadafa39392b3e200102596a3a4aad9d":["eut"], "20|openorgs____::c3fe999c74fad308132b8a5971367dce":["eut"], "20|openorgs____::1624ff7c01bb641b91f4518539a0c28a":["aurora"], "20|openorgs____::cdda7cfe17c89eb50628ec2eb1f8acd2":["aurora"], "20|openorgs____::818b75030e0e40612d69e049843ede7e":["aurora"], "20|openorgs____::0b0102bae51f4f4ef5ba57fbe1523b92":["aurora"], "20|openorgs____::ed47496b44722f0e9d7b98898189be0d":["aurora"], "20|openorgs____::eb0669daa9efeb898a3090d8aac7c953":["aurora"], "20|openorgs____::eb391317ed0dc684aa81ac16265de041":["aurora"], "20|openorgs____::f7cfcc98245e22c7d6e321cde930e746":["aurora"], "20|openorgs____::f33179d3306ba2599f7a898b056b604f":["aurora"], "20|pending_org_::75c41e6dd18466709ef359323d96fa05":["aurora"]} pathMap ={"author":"$['author'][*]['fullname']", \ "title":"$['title'][*]['value']",\ "orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" ,\ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index de054b962..8e91707b6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -29,10 +29,6 @@ outputPath the output path - - organizationtoresultcommunitymap - organization community map - pathMap the json path associated to each selection field @@ -315,7 +311,7 @@ allowedtypes - ${allowedtupes} + ${allowedtypes} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml index 307997d4c..6c5163448 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml @@ -26,12 +26,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml index 1fbaeb5d5..933bab7e0 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -25,12 +25,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml index dbb22b994..05824d209 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -18,12 +18,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml index 93a2f98be..f0db9c777 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -22,13 +22,21 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + + + + + + + + + yarn diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml index 8aec530cc..6aeffb457 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -21,12 +21,21 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + yarn @@ -75,9 +84,9 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList + --preparedInfoPath${workingDir}/communityorganization/preparedInfo/resultCommunityList --sourcePath${sourcePath}/ - --outputPath${workingDir}/resulttocommunityfromorganization/ + --outputPath${workingDir}/communityorganization/resulttocommunityfromorganization/ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml index 90ed2e0b6..dd845064b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml @@ -21,12 +21,19 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml index be88c45bd..773c7fba7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -18,13 +18,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + + + + + + + + @@ -41,8 +48,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -70,8 +79,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -99,8 +110,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -128,8 +141,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -159,8 +174,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -191,8 +208,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -220,8 +239,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -249,8 +270,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -278,8 +301,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -295,10 +320,11 @@ --outputPath${workingDir}/communitysemrel/software - + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml index dadea2d28..e963453da 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -21,12 +21,21 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + @@ -79,7 +88,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/publication - --outputPath${workingDir}/affiliationinstrepo/publication/relation + --outputPath${workingDir}/affiliationInstRepo/publication/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -108,7 +117,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/dataset - --outputPath${workingDir}/affiliationinstrepo/dataset/relation + --outputPath${workingDir}/affiliationInstRepo/dataset/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -137,7 +146,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/otherresearchproduct - --outputPath${workingDir}/affiliationinstrepo/otherresearchproduct/relation + --outputPath${workingDir}/affiliationInstRepo/otherresearchproduct/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -166,7 +175,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/software - --outputPath${workingDir}/affiliationinstrepo/software/relation + --outputPath${workingDir}/affiliationInstRepo/software/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -197,7 +206,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --outputPath${sourcePath}/relation - --sourcePath${workingDir}/affiliationinstrepo/ + --sourcePath${workingDir}/affiliationInstRepo/ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92b..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml deleted file mode 100644 index 7918df120..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml +++ /dev/null @@ -1,97 +0,0 @@ - - - - sourcePath - the source path - - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - yarn - cluster - PrepareResultOrganizationAssociation - eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --graphPath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --leavesPath${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath - --childParentPath${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath - --resultOrgPath${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath - --relationPath${workingDir}/affiliationSemanticRelation/preparedInfo/relation - - - - - - - - yarn - cluster - resultToOrganizationFromSemRel - eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.sql.shuffle.partitions=3840 - - --relationPath${workingDir}/affiliationSemanticRelation/preparedInfo/relation - --outputPath${sourcePath} - --leavesPath${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath - --childParentPath${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath - --resultOrgPath${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath - --hive_metastore_uris${hive_metastore_uris} - --workingDir${workingDir}/affiliationSemanticRelation/working - --iterations${iterations} - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh index 35220bd8c..9877fe7de 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh @@ -1,3 +1,3 @@ #!/bin/bash curl -LSs $1 | hdfs dfs -put - $2/$3 -curl -LSs http://api.crossref.org/works/10.1099/jgv.0.001453 > prova.txt \ No newline at end of file +#curl -LSs http://api.crossref.org/works/10.1099/jgv.0.001453 > prova.txt \ No newline at end of file From 5011c4d11a4c3884c99d784ed31a336ba89f8bfc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Dec 2023 15:57:26 +0100 Subject: [PATCH 11/57] refactoring after compiletion --- .../provision/IndexRecordTransformerTest.java | 2 +- .../dhp/oa/provision/XmlIndexingJobTest.java | 29 +++++++++---------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e07ba1b4e..e72883055 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -82,7 +82,7 @@ public class IndexRecordTransformerTest { void testPeerReviewed() throws IOException, TransformerException { final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, - XmlConverterJob.schemaLocation); + XmlConverterJob.schemaLocation); final Publication p = load("publication.json", Publication.class); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index b62acbac3..a3a140cf6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.IOException; import java.io.StringReader; import java.net.URI; @@ -32,8 +34,6 @@ import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import static org.junit.jupiter.api.Assertions.assertEquals; - @ExtendWith(MockitoExtension.class) public class XmlIndexingJobTest extends SolrTest { @@ -110,34 +110,33 @@ public class XmlIndexingJobTest extends SolrTest { QueryResponse rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "*:*")); assertEquals( - nRecord, rsp.getResults().getNumFound(), - "the number of indexed records should be equal to the number of input records"); - + nRecord, rsp.getResults().getNumFound(), + "the number of indexed records should be equal to the number of input records"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isgreen:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having isgreen = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having isgreen = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "openaccesscolor:bronze")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having openaccesscolor = bronze"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having openaccesscolor = bronze"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isindiamondjournal:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having isindiamondjournal = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having isindiamondjournal = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "publiclyfunded:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having publiclyfunded = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having publiclyfunded = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "peerreviewed:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having peerreviewed = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having peerreviewed = true"); } @Test From 62104790ae63d08946f0e340d67f3182cb469b8d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 21 Dec 2023 12:26:19 +0100 Subject: [PATCH 12/57] added metaresourcetype to the result hive DB view --- .../graph/hive/oozie_app/lib/scripts/postprocessing.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql index 149c326fa..748f77b27 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql @@ -1,10 +1,10 @@ DROP VIEW IF EXISTS ${hiveDbName}.result; CREATE VIEW IF NOT EXISTS ${hiveDbName}.result as - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.publication p + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.publication p union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.dataset d + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.dataset d union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.software s + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.software s union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.otherresearchproduct o; + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.otherresearchproduct o; From 3afd4aa57bb107e35f71108c64c45ada698cf8a7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:27:30 +0100 Subject: [PATCH 13/57] adjustments for country propagation --- .../PrepareDatasourceCountryAssociation.java | 5 ++- .../PrepareResultCountrySet.java | 2 +- .../SparkCountryPropagationJob.java | 2 +- .../PrepareInfo.java | 2 +- .../input_countrypropagation_parameters.json | 32 ++++++++++++++++ .../input_prepareassoc_parameters.json | 32 ++++++++++++++++ ...input_prepareresultcountry_parameters.json | 38 +++++++++++++++++++ .../countrypropagation/oozie_app/workflow.xml | 35 ++++++++++------- 8 files changed, 130 insertions(+), 18 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 430c26592..a016509e5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -45,7 +45,7 @@ public class PrepareDatasourceCountryAssociation { .toString( PrepareDatasourceCountryAssociation.class .getResourceAsStream( - "/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -90,7 +90,8 @@ public class PrepareDatasourceCountryAssociation { (FilterFunction) ds -> !ds.getDataInfo().getDeletedbyinference() && Optional.ofNullable(ds.getDatasourcetype()).isPresent() && Optional.ofNullable(ds.getDatasourcetype().getClassid()).isPresent() && - (allowedtypes.contains(ds.getJurisdiction().getClassid()) || + ((Optional.ofNullable(ds.getJurisdiction()).isPresent() && + allowedtypes.contains(ds.getJurisdiction().getClassid())) || whitelist.contains(ds.getId()))); // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 184d24751..884aa0e47 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -32,7 +32,7 @@ public class PrepareResultCountrySet { .toString( PrepareResultCountrySet.class .getResourceAsStream( - "/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 17247f812..92930c18b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -35,7 +35,7 @@ public class SparkCountryPropagationJob { .toString( SparkCountryPropagationJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java index 8d3432f06..bdfdde13b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java @@ -60,7 +60,7 @@ public class PrepareInfo implements Serializable { .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json")); + "/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json new file mode 100644 index 000000000..d3cde8b74 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json @@ -0,0 +1,32 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "preparedInfoPath", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } +] diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json new file mode 100644 index 000000000..a00105f2b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json @@ -0,0 +1,32 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "whitelist", + "paramDescription": "the datasource having a type different from the allowed ones but that we want to add anyway", + "paramRequired": true + }, + { + "paramName": "at", + "paramLongName": "allowedtypes", + "paramDescription": "the allowed datasource types for country propagation", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json new file mode 100644 index 000000000..18163d1f9 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json @@ -0,0 +1,38 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"out", + "paramLongName":"outputPath", + "paramDescription": "the output path", + "paramRequired": true + }, + { + "paramName":"w", + "paramLongName":"workingPath", + "paramDescription": "the working path", + "paramRequired": true + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "preparedInfoPath", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml index 933bab7e0..81d6dc3dc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -61,7 +61,7 @@ --sourcePath${sourcePath} --whitelist${whitelist} --allowedtypes${allowedtypes} - --workingPath${workingDir}/country + --outputPath${workingDir}/preparedInfo @@ -95,8 +95,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/publication - --workingPath${workingDir}/country + --outputPath${workingDir}/publication + --workingPath${workingDir}/workingP --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --preparedInfoPath${workingDir}/preparedInfo @@ -123,8 +125,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/dataset - --workingPath${workingDir}/country + --outputPath${workingDir}/dataset + --workingPath${workingDir}/workingD --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --preparedInfoPath${workingDir}/preparedInfo @@ -151,8 +155,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/otherresearchproduct - --workingPath${workingDir}/country + --outputPath${workingDir}/otherresearchproduct + --workingPath${workingDir}/workingO --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --preparedInfoPath${workingDir}/preparedInfo @@ -179,14 +185,16 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/software - --workingPath${workingDir}/country + --outputPath${workingDir}/software + --workingPath${workingDir}/workingS --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --preparedInfoPath${workingDir}/preparedInfo - + @@ -216,9 +224,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/publication - --workingPath${workingDir}/country + --preparedInfoPath${workingDir}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - + --outputPath${workingDir}/country/publication @@ -245,9 +253,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/dataset - --workingPath${workingDir}/country + --preparedInfoPath${workingDir}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - + --outputPath${workingDir}/country/dataset @@ -274,9 +282,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/otherresearchproduct - --workingPath${workingDir}/country + --preparedInfoPath${workingDir}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - + --outputPath${workingDir}/country/otherresearchproduct @@ -303,8 +311,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/software - --workingPath${workingDir}/country + --preparedInfoPath${workingDir}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/country/software From b06aea0adfe716fede41a6fd38e847dc90dd4692 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:35:37 +0100 Subject: [PATCH 14/57] adding the bulkTag parameter file in the folder for the oozie workflow for bulkTagging. Changes the path in the class --- .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 2 +- .../bulktag/input_bulkTag_parameters.json | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 51307ccd1..e20fcb081 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -45,7 +45,7 @@ public class SparkBulkTagJob { .toString( SparkBulkTagJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json new file mode 100644 index 000000000..ce1a8ecab --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json @@ -0,0 +1,38 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "pm", + "paramLongName":"pathMap", + "paramDescription": "the json path associated to each selection field", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "tg", + "paramLongName": "taggingConf", + "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", + "paramRequired": false + }, + { + "paramName": "bu", + "paramLongName": "baseURL", + "paramDescription": "this parameter is to specify the api to be queried (beta or production)", + "paramRequired": false + } +] \ No newline at end of file From 89f269c7f4b63070358724213b5d39fac0678916 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:37:50 +0100 Subject: [PATCH 15/57] changed the path to the parameter file in the class for entitytoorganization propagation --- .../SparkEntityToOrganizationFromSemRel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java index 87c0ec2b9..4e30a6d6a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java @@ -39,7 +39,7 @@ public class SparkEntityToOrganizationFromSemRel implements Serializable { .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json")); + "/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); From 009730b3d1616fa3337cad380b9ff8e55641c9a5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:42:09 +0100 Subject: [PATCH 16/57] added properties file in the forlder for the workflow of orcid propagation. Changes the path in the classes implementing the propagationchanged the path to the parameter file in the class for entitytoorganization propagation --- .../PrepareResultOrcidAssociationStep1.java | 2 +- .../PrepareResultOrcidAssociationStep2.java | 2 +- .../SparkOrcidToResultFromSemRelJob.java | 2 +- .../input_orcidtoresult_parameters.json | 44 +++++++++++++++++++ ...input_prepareorcidtoresult_parameters.json | 38 ++++++++++++++++ ...nput_prepareorcidtoresult_parameters2.json | 20 +++++++++ 6 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 95b870292..bc72a2ae1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -31,7 +31,7 @@ public class PrepareResultOrcidAssociationStep1 { .toString( PrepareResultOrcidAssociationStep1.class .getResourceAsStream( - "/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConf); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java index c60012a74..46894d0e1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java @@ -29,7 +29,7 @@ public class PrepareResultOrcidAssociationStep2 { .toString( PrepareResultOrcidAssociationStep2.class .getResourceAsStream( - "/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json")); + "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 5f9260e5d..c5d632658 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -36,7 +36,7 @@ public class SparkOrcidToResultFromSemRelJob { .toString( SparkOrcidToResultFromSemRelJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json new file mode 100644 index 000000000..3cbaa23bb --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json @@ -0,0 +1,44 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"sg", + "paramLongName":"saveGraph", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName":"pu", + "paramLongName":"possibleUpdatesPath", + "paramDescription": "the path the the association resultId orcid author list can be found", + "paramRequired": true + }, + { + "paramName":"test", + "paramLongName":"isTest", + "paramDescription": "true if it is executing a test", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json new file mode 100644 index 000000000..08648d61a --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json @@ -0,0 +1,38 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"as", + "paramLongName":"allowedsemrels", + "paramDescription": "the allowed sematinc relations for propagation", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json new file mode 100644 index 000000000..1a67134a6 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json @@ -0,0 +1,20 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } +] \ No newline at end of file From f2352e8a78017f26f297833546e1a0853c5a89b7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:43:34 +0100 Subject: [PATCH 17/57] changed in the classes the path for the property files for the propagation of community from project --- .../resulttocommunityfromproject/PrepareResultCommunitySet.java | 2 +- .../SparkResultToCommunityFromProject.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java index 467e11a96..512dfa9be 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java @@ -38,7 +38,7 @@ public class PrepareResultCommunitySet { .toString( PrepareResultCommunitySet.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index 229ac7e32..dde534061 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -44,7 +44,7 @@ public class SparkResultToCommunityFromProject implements Serializable { .toString( SparkResultToCommunityFromProject.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); From 2f7b9ad815358857dd14656ae1e4b160e7721662 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:46:15 +0100 Subject: [PATCH 18/57] added properties file in the forlder for the workflow of project to result propagation. Changes the path in the classes implementing the propagation --- .../PrepareProjectResultsAssociation.java | 2 +- .../SparkResultToProjectThroughSemRelJob.java | 2 +- ...put_prepareprojecttoresult_parameters.json | 33 ++++++++++++++ .../input_projecttoresult_parameters.json | 44 +++++++++++++++++++ 4 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java index ac61e26f9..8f4e2ad9a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java @@ -28,7 +28,7 @@ public class PrepareProjectResultsAssociation { .toString( PrepareProjectResultsAssociation.class .getResourceAsStream( - "/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 1ec521af1..e7518673d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -33,7 +33,7 @@ public class SparkResultToProjectThroughSemRelJob { .toString( SparkResultToProjectThroughSemRelJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json new file mode 100644 index 000000000..a70dbd6a0 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json @@ -0,0 +1,33 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName":"asr", + "paramLongName":"allowedsemrels", + "paramDescription": "the types of the allowed datasources. Split by ;", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"pu", + "paramLongName":"potentialUpdatePath", + "paramDescription": "the path of the potential updates ", + "paramRequired": true + }, + { + "paramName":"al", + "paramLongName":"alreadyLinkedPath", + "paramDescription": "the path of the already linked project result_set", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json new file mode 100644 index 000000000..7f44ba03c --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json @@ -0,0 +1,44 @@ +[ + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"sg", + "paramLongName":"saveGraph", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"pu", + "paramLongName":"potentialUpdatePath", + "paramDescription": "the path of the potential updates ", + "paramRequired": true + }, + { + "paramName":"al", + "paramLongName":"alreadyLinkedPath", + "paramDescription": "the path of the already linked project result_set", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "test", + "paramLongName": "isTest", + "paramDescription": "true if it is a test running", + "paramRequired": false + } +] \ No newline at end of file From 2f3b5a133d4ddfc4ed6a38366c927330d2c25b08 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 13:56:40 +0100 Subject: [PATCH 19/57] added properties file in the forlder for the workflow of result to community from organization propagation. Changes the path in the classes implementing the propagation --- .../PrepareResultCommunitySet.java | 2 +- ...kResultToCommunityFromOrganizationJob.java | 2 +- .../input_communitytoresult_parameters.json | 28 ++++++++++++++++ ...t_preparecommunitytoresult_parameters.json | 33 +++++++++++++++++++ 4 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index 54fa60168..be31cd46c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -34,7 +34,7 @@ public class PrepareResultCommunitySet { .toString( PrepareResultCommunitySet.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index adb7feef7..cc87b80e5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -36,7 +36,7 @@ public class SparkResultToCommunityFromOrganizationJob { .toString( SparkResultToCommunityFromOrganizationJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json new file mode 100644 index 000000000..0db8085d1 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json @@ -0,0 +1,28 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "p", + "paramLongName": "preparedInfoPath", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": true + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json new file mode 100644 index 000000000..3601db7ac --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json @@ -0,0 +1,33 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "bu", + "paramLongName": "baseURL", + "paramDescription": "the base URL to the community API to use", + "paramRequired": false + } + +] \ No newline at end of file From 9f966b59d446ba83d9dd002dddaf1d9585a3b037 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 14:11:47 +0100 Subject: [PATCH 20/57] added properties file in the forlder for the workflow of result to community from semrel propagation. Changes the path in the classes implementing the propagation --- .../PrepareResultCommunitySetStep1.java | 2 +- .../PrepareResultCommunitySetStep2.java | 2 +- ...parkResultToCommunityThroughSemRelJob.java | 2 +- .../input_communitytoresult_parameters.json | 52 +++++++++++++++++++ ..._preparecommunitytoresult2_parameters.json | 20 +++++++ ...t_preparecommunitytoresult_parameters.json | 44 ++++++++++++++++ 6 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index 40c074a6e..aede9ef05 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -61,7 +61,7 @@ public class PrepareResultCommunitySetStep1 { .toString( PrepareResultCommunitySetStep1.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java index 0ddb19a1a..a53d3dfe3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java @@ -31,7 +31,7 @@ public class PrepareResultCommunitySetStep2 { .toString( PrepareResultCommunitySetStep2.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index a10737849..4929c7582 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -33,7 +33,7 @@ public class SparkResultToCommunityThroughSemRelJob { .toString( SparkResultToCommunityThroughSemRelJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json new file mode 100644 index 000000000..a40ce375e --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json @@ -0,0 +1,52 @@ +[ + + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"sg", + "paramLongName":"saveGraph", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "preparedInfoPath", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": true + }, + { + "paramName":"test", + "paramLongName":"isTest", + "paramDescription": "true if it is executing a test", + "paramRequired": false + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json new file mode 100644 index 000000000..3ba3c8e9c --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json new file mode 100644 index 000000000..c6389ec8d --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json @@ -0,0 +1,44 @@ +[ + { + "paramName":"bu", + "paramLongName":"baseURL", + "paramDescription": "URL of the isLookUp Service", + "paramRequired": true + }, + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"as", + "paramLongName":"allowedsemrels", + "paramDescription": "the allowed semantic relations for propagation", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + } +] \ No newline at end of file From cb14470ba6779bd6f5dea3e1b937512295c0854a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 14:50:05 +0100 Subject: [PATCH 21/57] added properties file in the forlder for the workflow of result to organization from inst repo propagation. Changes the path in the classes implementing the propagation --- .../AppendNewRelations.java | 2 +- .../PrepareResultInstRepoAssociation.java | 2 +- ...arkResultToOrganizationFromIstRepoJob.java | 2 +- .../input_newrelation_parameters.json | 20 +++++++ .../input_prepareresultorg_parameters.json | 32 +++++++++++ ...sulaffiliationfrominstrepo_parameters.json | 56 +++++++++++++++++++ 6 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java index 636c14b65..11e942142 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java @@ -30,7 +30,7 @@ public class AppendNewRelations implements Serializable { .toString( AppendNewRelations.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index deec6fedc..57488bd20 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -40,7 +40,7 @@ public class PrepareResultInstRepoAssociation { .toString( PrepareResultInstRepoAssociation.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index bbad20e2d..c8862b10c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -47,7 +47,7 @@ public class SparkResultToOrganizationFromIstRepoJob { .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json new file mode 100644 index 000000000..5fe92cff1 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + },{ + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "institutional repositories that should not be considered for the propagation", + "paramRequired": false +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json new file mode 100644 index 000000000..3f4b1d151 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json @@ -0,0 +1,32 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the working path", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + },{ + "paramName": "bl", + "paramLongName": "blacklist", + "paramDescription": "institutional repositories that should not be considered for the propagation", + "paramRequired": false +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json new file mode 100644 index 000000000..d2b076c82 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json @@ -0,0 +1,56 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"sg", + "paramLongName":"saveGraph", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"dop", + "paramLongName":"datasourceOrganizationPath", + "paramDescription": "path where to store/find association from datasource and organization", + "paramRequired": true + }, + { + "paramName":"alp", + "paramLongName":"alreadyLinkedPath", + "paramDescription": "path where to store/find already linked results and organizations", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "test", + "paramLongName": "isTest", + "paramDescription": "true if it is a test running", + "paramRequired": false + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + } +] \ No newline at end of file From 02636e802c26c284efa1415d168815c5b23ed655 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Mon, 2 Oct 2023 09:25:12 +0200 Subject: [PATCH 22/57] SparkCreateSimRels: - Create dedup blocks from the complete queue of records matching cluster key instead of truncating the results - Clean titles once before clustering and similarity comparisons - Added support for filtered fields in model - Added support for sorting List fields in model - Added new JSONListClustering and numAuthorsTitleSuffixPrefixChain clustering functions - Added new maxLengthMatch comparator function - Use reduced complexity Levenshtein with threshold in levensteinTitle - Use reduced complexity AuthorsMatch with threshold early-quit - Use incremental Connected Component to decrease comparisons in similarity match in BlockProcessor - Use new clusterings configuration in Dedup tests SparkWhitelistSimRels: use left semi join for clarity and performance SparkCreateMergeRels: - Use new connected component algorithm that converge faster than Spark GraphX provided algorithm - Refactored to use Windowing sorting rather than groupBy to reduce memory pressure - Use historical pivot table to generate singleton rels, merged rels and keep continuity with dedupIds used in the past - Comparator for pivot record selection now uses "tomorrow" as filler for missing or incorrect date instead of "2000-01-01" - Changed generation of ids of type dedup_wf_001 to avoid collisions DedupRecordFactory: use reduceGroups instead of mapGroups to decrease memory pressure --- .../AbstractClusteringFunction.java | 23 +- .../eu/dnetlib/pace/clustering/Acronyms.java | 2 +- .../pace/clustering/ClusteringFunction.java | 2 +- .../pace/clustering/ImmutableFieldValue.java | 2 +- .../pace/clustering/JSONListClustering.java | 69 ++++ .../pace/clustering/KeywordsClustering.java | 12 +- .../pace/clustering/LastNameFirstInitial.java | 7 +- .../pace/clustering/LowercaseClustering.java | 2 +- .../dnetlib/pace/clustering/NgramPairs.java | 4 +- .../eu/dnetlib/pace/clustering/Ngrams.java | 4 +- .../NumAuthorsTitleSuffixPrefixChain.java | 113 ++++++ .../pace/clustering/PersonClustering.java | 6 +- .../dnetlib/pace/clustering/PersonHash.java | 2 +- .../clustering/RandomClusteringFunction.java | 2 +- .../pace/clustering/SortedNgramPairs.java | 7 +- .../clustering/SpaceTrimmingFieldValue.java | 4 +- .../dnetlib/pace/clustering/SuffixPrefix.java | 2 +- .../pace/clustering/UrlClustering.java | 14 +- .../WordsStatsSuffixPrefixChain.java | 2 +- .../pace/clustering/WordsSuffixPrefix.java | 2 +- .../pace/common/AbstractPaceFunctions.java | 66 ++-- .../eu/dnetlib/pace/model/ClusteringDef.java | 6 +- .../java/eu/dnetlib/pace/model/FieldDef.java | 35 ++ .../eu/dnetlib/pace/model/SparkDeduper.scala | 38 +- .../eu/dnetlib/pace/model/SparkModel.scala | 46 ++- .../eu/dnetlib/pace/tree/AuthorsMatch.java | 25 +- .../dnetlib/pace/tree/InstanceTypeMatch.java | 2 +- .../eu/dnetlib/pace/tree/LevensteinTitle.java | 20 +- .../eu/dnetlib/pace/tree/MaxLengthMatch.java | 29 ++ .../pace/tree/support/AbstractComparator.java | 10 + .../eu/dnetlib/pace/util/BlockProcessor.java | 24 +- .../util/IncrementalConnectedComponents.java | 50 +++ .../eu/dnetlib/pace/util/MapDocumentUtil.java | 2 + .../eu/dnetlib/pace/util/PaceResolver.java | 2 +- .../clustering/ClusteringFunctionTest.java | 40 +-- .../IncrementalConnectedComponentsTest.java | 40 +++ .../dhp/oa/dedup/AbstractSparkAction.java | 4 + .../dhp/oa/dedup/DedupRecordFactory.java | 134 +++---- .../eu/dnetlib/dhp/oa/dedup/IdGenerator.java | 21 +- .../dhp/oa/dedup/SparkCreateMergeRels.java | 332 ++++++++++------- .../dhp/oa/dedup/SparkWhitelistSimRels.java | 16 +- .../oa/dedup/graph/ConnectedComponent.java | 100 ------ .../dhp/oa/dedup/graph/GraphProcessor.scala | 37 -- .../dhp/oa/dedup/model/Identifier.java | 18 +- .../dhp/oa/dedup/createCC_parameters.json | 12 + .../dedup/scan/oozie_app/config-default.xml | 4 + .../dhp/oa/dedup/scan/oozie_app/workflow.xml | 2 + .../kwartile/lib/cc/ConnectedComponent.scala | 335 ++++++++++++++++++ .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 160 +++++++-- .../dnetlib/dhp/dedup/conf/ds.curr.conf.json | 3 +- .../dnetlib/dhp/dedup/conf/orp.curr.conf.json | 3 +- .../dnetlib/dhp/dedup/conf/pub.curr.conf.json | 49 ++- .../dnetlib/dhp/dedup/conf/sw.curr.conf.json | 3 +- .../dedup/pivot_history/pivot_history.json | 1 + pom.xml | 20 ++ 55 files changed, 1437 insertions(+), 533 deletions(-) create mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java create mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java create mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/MaxLengthMatch.java create mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/util/IncrementalConnectedComponents.java create mode 100644 dhp-pace-core/src/test/java/eu/dnetlib/pace/util/IncrementalConnectedComponentsTest.java delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/GraphProcessor.scala create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/scala/com/kwartile/lib/cc/ConnectedComponent.scala create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/pivot_history/pivot_history.json diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java index 3da8eb490..e971ec5bb 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java @@ -14,9 +14,9 @@ import eu.dnetlib.pace.config.Config; public abstract class AbstractClusteringFunction extends AbstractPaceFunctions implements ClusteringFunction { - protected Map params; + protected Map params; - public AbstractClusteringFunction(final Map params) { + public AbstractClusteringFunction(final Map params) { this.params = params; } @@ -27,7 +27,7 @@ public abstract class AbstractClusteringFunction extends AbstractPaceFunctions i return fields .stream() .filter(f -> !f.isEmpty()) - .map(this::normalize) + .map(s -> normalize(s)) .map(s -> filterAllStopWords(s)) .map(s -> doApply(conf, s)) .map(c -> filterBlacklisted(c, ngramBlacklist)) @@ -36,11 +36,24 @@ public abstract class AbstractClusteringFunction extends AbstractPaceFunctions i .collect(Collectors.toCollection(HashSet::new)); } - public Map getParams() { + public Map getParams() { return params; } protected Integer param(String name) { - return params.get(name); + Object val = params.get(name); + if (val == null) + return null; + if (val instanceof Number) { + return ((Number) val).intValue(); + } + return Integer.parseInt(val.toString()); + } + + protected int paramOrDefault(String name, int i) { + Integer res = param(name); + if (res == null) + res = i; + return res; } } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java index 9072fbb4b..b5db27106 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java @@ -13,7 +13,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("acronyms") public class Acronyms extends AbstractClusteringFunction { - public Acronyms(Map params) { + public Acronyms(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java index 8b7852418..269de867d 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java @@ -11,6 +11,6 @@ public interface ClusteringFunction { public Collection apply(Config config, List fields); - public Map getParams(); + public Map getParams(); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java index bc8844aee..cbfcde266 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java @@ -12,7 +12,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("immutablefieldvalue") public class ImmutableFieldValue extends AbstractClusteringFunction { - public ImmutableFieldValue(final Map params) { + public ImmutableFieldValue(final Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java new file mode 100644 index 000000000..e00092bd0 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java @@ -0,0 +1,69 @@ + +package eu.dnetlib.pace.clustering; + +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; + +import com.jayway.jsonpath.Configuration; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.Option; + +import eu.dnetlib.pace.common.AbstractPaceFunctions; +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.util.MapDocumentUtil; + +@ClusteringClass("jsonlistclustering") +public class JSONListClustering extends AbstractPaceFunctions implements ClusteringFunction { + + private Map params; + + public JSONListClustering(Map params) { + this.params = params; + } + + @Override + public Map getParams() { + return params; + } + + @Override + public Collection apply(Config conf, List fields) { + return fields + .stream() + .filter(f -> !f.isEmpty()) + .map(s -> doApply(conf, s)) + .filter(StringUtils::isNotBlank) + .collect(Collectors.toCollection(HashSet::new)); + } + + private String doApply(Config conf, String json) { + StringBuilder st = new StringBuilder(); // to build the string used for comparisons basing on the jpath into + // parameters + final DocumentContext documentContext = JsonPath + .using(Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS)) + .parse(json); + + // for each path in the param list + for (String key : params.keySet().stream().filter(k -> k.contains("jpath")).collect(Collectors.toList())) { + String path = params.get(key).toString(); + String value = MapDocumentUtil.getJPathString(path, documentContext); + if (value == null || value.isEmpty()) + value = ""; + st.append(value); + st.append(" "); + } + + st.setLength(st.length() - 1); + + if (StringUtils.isBlank(st)) { + return "1"; + } + return st.toString(); + } +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java index 38299adb4..fdd8d1fb1 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java @@ -11,7 +11,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("keywordsclustering") public class KeywordsClustering extends AbstractClusteringFunction { - public KeywordsClustering(Map params) { + public KeywordsClustering(Map params) { super(params); } @@ -19,8 +19,8 @@ public class KeywordsClustering extends AbstractClusteringFunction { protected Collection doApply(final Config conf, String s) { // takes city codes and keywords codes without duplicates - Set keywords = getKeywords(s, conf.translationMap(), params.getOrDefault("windowSize", 4)); - Set cities = getCities(s, params.getOrDefault("windowSize", 4)); + Set keywords = getKeywords(s, conf.translationMap(), paramOrDefault("windowSize", 4)); + Set cities = getCities(s, paramOrDefault("windowSize", 4)); // list of combination to return as result final Collection combinations = new LinkedHashSet(); @@ -28,7 +28,7 @@ public class KeywordsClustering extends AbstractClusteringFunction { for (String keyword : keywordsToCodes(keywords, conf.translationMap())) { for (String city : citiesToCodes(cities)) { combinations.add(keyword + "-" + city); - if (combinations.size() >= params.getOrDefault("max", 2)) { + if (combinations.size() >= paramOrDefault("max", 2)) { return combinations; } } @@ -42,8 +42,8 @@ public class KeywordsClustering extends AbstractClusteringFunction { return fields .stream() .filter(f -> !f.isEmpty()) - .map(this::cleanup) - .map(this::normalize) + .map(KeywordsClustering::cleanup) + .map(KeywordsClustering::normalize) .map(s -> filterAllStopWords(s)) .map(s -> doApply(conf, s)) .map(c -> filterBlacklisted(c, ngramBlacklist)) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java index 5a385961a..9692f5762 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java @@ -16,7 +16,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction { private boolean DEFAULT_AGGRESSIVE = true; - public LastNameFirstInitial(final Map params) { + public LastNameFirstInitial(final Map params) { super(params); } @@ -25,7 +25,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction { return fields .stream() .filter(f -> !f.isEmpty()) - .map(this::normalize) + .map(LastNameFirstInitial::normalize) .map(s -> doApply(conf, s)) .map(c -> filterBlacklisted(c, ngramBlacklist)) .flatMap(c -> c.stream()) @@ -33,8 +33,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction { .collect(Collectors.toCollection(HashSet::new)); } - @Override - protected String normalize(final String s) { + public static String normalize(final String s) { return fixAliases(transliterate(nfd(unicodeNormalization(s)))) // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input // strings diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java index a3a6c4881..807f41dd5 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java @@ -15,7 +15,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("lowercase") public class LowercaseClustering extends AbstractClusteringFunction { - public LowercaseClustering(final Map params) { + public LowercaseClustering(final Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java index aa06aa408..bcc9667a8 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java @@ -12,11 +12,11 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("ngrampairs") public class NgramPairs extends Ngrams { - public NgramPairs(Map params) { + public NgramPairs(Map params) { super(params, false); } - public NgramPairs(Map params, boolean sorted) { + public NgramPairs(Map params, boolean sorted) { super(params, sorted); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java index 96c305a16..7b862c729 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java @@ -10,11 +10,11 @@ public class Ngrams extends AbstractClusteringFunction { private final boolean sorted; - public Ngrams(Map params) { + public Ngrams(Map params) { this(params, false); } - public Ngrams(Map params, boolean sorted) { + public Ngrams(Map params, boolean sorted) { super(params); this.sorted = sorted; } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java new file mode 100644 index 000000000..f1d1e17b9 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java @@ -0,0 +1,113 @@ + +package eu.dnetlib.pace.clustering; + +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import com.google.common.base.Splitter; +import com.google.common.collect.Sets; + +import eu.dnetlib.pace.config.Config; + +@ClusteringClass("numAuthorsTitleSuffixPrefixChain") +public class NumAuthorsTitleSuffixPrefixChain extends AbstractClusteringFunction { + + public NumAuthorsTitleSuffixPrefixChain(Map params) { + super(params); + } + + @Override + public Collection apply(Config conf, List fields) { + + try { + int num_authors = Math.min(Integer.parseInt(fields.get(0)), 21); // SIZE threshold is 20, +1 + + if (num_authors > 0) { + return super.apply(conf, fields.subList(1, fields.size())) + .stream() + .map(s -> num_authors + "-" + s) + .collect(Collectors.toList()); + } + } catch (NumberFormatException e) { + // missing or null authors array + } + + return Collections.emptyList(); + } + + @Override + protected Collection doApply(Config conf, String s) { + return suffixPrefixChain(cleanup(s), param("mod")); + } + + private Collection suffixPrefixChain(String s, int mod) { + // create the list of words from the string (remove short words) + List wordsList = Arrays + .stream(s.split(" ")) + .filter(si -> si.length() > 3) + .collect(Collectors.toList()); + + final int words = wordsList.size(); + final int letters = s.length(); + + // create the prefix: number of words + number of letters/mod + String prefix = words / mod + "-"; + + return doSuffixPrefixChain(wordsList, prefix); + + } + + private Collection doSuffixPrefixChain(List wordsList, String prefix) { + + Set set = Sets.newLinkedHashSet(); + switch (wordsList.size()) { + case 0: + break; + case 1: + set.add(wordsList.get(0)); + break; + case 2: + set + .add( + prefix + + suffix(wordsList.get(0), 3) + + prefix(wordsList.get(1), 3)); + + set + .add( + prefix + + prefix(wordsList.get(0), 3) + + suffix(wordsList.get(1), 3)); + + break; + default: + set + .add( + prefix + + suffix(wordsList.get(0), 3) + + prefix(wordsList.get(1), 3) + + suffix(wordsList.get(2), 3)); + + set + .add( + prefix + + prefix(wordsList.get(0), 3) + + suffix(wordsList.get(1), 3) + + prefix(wordsList.get(2), 3)); + break; + } + + return set; + + } + + private String suffix(String s, int len) { + return s.substring(s.length() - len); + } + + private String prefix(String s, int len) { + return s.substring(0, len); + } + +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java index b4a04ce65..91b51bebb 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java @@ -17,11 +17,11 @@ import eu.dnetlib.pace.model.Person; @ClusteringClass("personClustering") public class PersonClustering extends AbstractPaceFunctions implements ClusteringFunction { - private Map params; + private Map params; private static final int MAX_TOKENS = 5; - public PersonClustering(final Map params) { + public PersonClustering(final Map params) { this.params = params; } @@ -77,7 +77,7 @@ public class PersonClustering extends AbstractPaceFunctions implements Clusterin // } @Override - public Map getParams() { + public Map getParams() { return params; } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java index a3d58a9be..09a112c37 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java @@ -15,7 +15,7 @@ public class PersonHash extends AbstractClusteringFunction { private boolean DEFAULT_AGGRESSIVE = false; - public PersonHash(final Map params) { + public PersonHash(final Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java index 2aab926da..3733dfc74 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java @@ -8,7 +8,7 @@ import eu.dnetlib.pace.config.Config; public class RandomClusteringFunction extends AbstractClusteringFunction { - public RandomClusteringFunction(Map params) { + public RandomClusteringFunction(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java index b085ae26d..ca1b4189b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java @@ -1,7 +1,10 @@ package eu.dnetlib.pace.clustering; -import java.util.*; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; import com.google.common.base.Joiner; import com.google.common.base.Splitter; @@ -12,7 +15,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("sortedngrampairs") public class SortedNgramPairs extends NgramPairs { - public SortedNgramPairs(Map params) { + public SortedNgramPairs(Map params) { super(params, false); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java index 392aecc79..048380f7e 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java @@ -15,7 +15,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("spacetrimmingfieldvalue") public class SpaceTrimmingFieldValue extends AbstractClusteringFunction { - public SpaceTrimmingFieldValue(final Map params) { + public SpaceTrimmingFieldValue(final Map params) { super(params); } @@ -25,7 +25,7 @@ public class SpaceTrimmingFieldValue extends AbstractClusteringFunction { res .add( - StringUtils.isBlank(s) ? RandomStringUtils.random(getParams().get("randomLength")) + StringUtils.isBlank(s) ? RandomStringUtils.random(param("randomLength")) : s.toLowerCase().replaceAll("\\s+", "")); return res; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java index 2a1c023a9..b6921e9f1 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java @@ -12,7 +12,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("suffixprefix") public class SuffixPrefix extends AbstractClusteringFunction { - public SuffixPrefix(Map params) { + public SuffixPrefix(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java index 5b267ad10..34f41085b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java @@ -15,12 +15,17 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("urlclustering") public class UrlClustering extends AbstractPaceFunctions implements ClusteringFunction { - protected Map params; + protected Map params; - public UrlClustering(final Map params) { + public UrlClustering(final Map params) { this.params = params; } + @Override + public Map getParams() { + return params; + } + @Override public Collection apply(final Config conf, List fields) { try { @@ -35,11 +40,6 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu } } - @Override - public Map getParams() { - return null; - } - private URL asUrl(String value) { try { return new URL(value); diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java index c8e02f8f0..22351cf8f 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java @@ -11,7 +11,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("wordsStatsSuffixPrefixChain") public class WordsStatsSuffixPrefixChain extends AbstractClusteringFunction { - public WordsStatsSuffixPrefixChain(Map params) { + public WordsStatsSuffixPrefixChain(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java index e606590a5..f9fef376b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java @@ -12,7 +12,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("wordssuffixprefix") public class WordsSuffixPrefix extends AbstractClusteringFunction { - public WordsSuffixPrefix(Map params) { + public WordsSuffixPrefix(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java index b440686de..ba7639ada 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java @@ -16,7 +16,6 @@ import org.apache.commons.lang3.StringUtils; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.ibm.icu.text.Transliterator; @@ -27,7 +26,7 @@ import eu.dnetlib.pace.clustering.NGramUtils; * * @author claudio */ -public abstract class AbstractPaceFunctions { +public class AbstractPaceFunctions { // city map to be used when translating the city names into codes private static Map cityMap = AbstractPaceFunctions @@ -62,11 +61,14 @@ public abstract class AbstractPaceFunctions { private static Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})"); - protected String concat(final List l) { + private static Pattern romanNumberPattern = Pattern + .compile("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$"); + + protected static String concat(final List l) { return Joiner.on(" ").skipNulls().join(l); } - protected String cleanup(final String s) { + public static String cleanup(final String s) { final String s1 = HTML_REGEX.matcher(s).replaceAll(""); final String s2 = unicodeNormalization(s1.toLowerCase()); final String s3 = nfd(s2); @@ -82,7 +84,7 @@ public abstract class AbstractPaceFunctions { return s12; } - protected String fixXML(final String a) { + protected static String fixXML(final String a) { return a .replaceAll("–", " ") @@ -91,7 +93,7 @@ public abstract class AbstractPaceFunctions { .replaceAll("−", " "); } - protected boolean checkNumbers(final String a, final String b) { + protected static boolean checkNumbers(final String a, final String b) { final String numbersA = getNumbers(a); final String numbersB = getNumbers(b); final String romansA = getRomans(a); @@ -99,7 +101,7 @@ public abstract class AbstractPaceFunctions { return !numbersA.equals(numbersB) || !romansA.equals(romansB); } - protected String getRomans(final String s) { + protected static String getRomans(final String s) { final StringBuilder sb = new StringBuilder(); for (final String t : s.split(" ")) { sb.append(isRoman(t) ? t : ""); @@ -107,13 +109,12 @@ public abstract class AbstractPaceFunctions { return sb.toString(); } - protected boolean isRoman(final String s) { - return s - .replaceAll("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$", "qwertyuiop") - .equals("qwertyuiop"); + protected static boolean isRoman(final String s) { + Matcher m = romanNumberPattern.matcher(s); + return m.matches() && m.hitEnd(); } - protected String getNumbers(final String s) { + protected static String getNumbers(final String s) { final StringBuilder sb = new StringBuilder(); for (final String t : s.split(" ")) { sb.append(isNumber(t) ? t : ""); @@ -121,7 +122,7 @@ public abstract class AbstractPaceFunctions { return sb.toString(); } - public boolean isNumber(String strNum) { + public static boolean isNumber(String strNum) { if (strNum == null) { return false; } @@ -147,7 +148,7 @@ public abstract class AbstractPaceFunctions { } } - protected String removeSymbols(final String s) { + protected static String removeSymbols(final String s) { final StringBuilder sb = new StringBuilder(); s.chars().forEach(ch -> { @@ -157,11 +158,11 @@ public abstract class AbstractPaceFunctions { return sb.toString().replaceAll("\\s+", " "); } - protected boolean notNull(final String s) { + protected static boolean notNull(final String s) { return s != null; } - protected String normalize(final String s) { + public static String normalize(final String s) { return fixAliases(transliterate(nfd(unicodeNormalization(s)))) .toLowerCase() // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input @@ -174,16 +175,16 @@ public abstract class AbstractPaceFunctions { .trim(); } - public String nfd(final String s) { + public static String nfd(final String s) { return Normalizer.normalize(s, Normalizer.Form.NFD); } - public String utf8(final String s) { + public static String utf8(final String s) { byte[] bytes = s.getBytes(StandardCharsets.UTF_8); return new String(bytes, StandardCharsets.UTF_8); } - public String unicodeNormalization(final String s) { + public static String unicodeNormalization(final String s) { Matcher m = hexUnicodePattern.matcher(s); StringBuffer buf = new StringBuffer(s.length()); @@ -195,7 +196,7 @@ public abstract class AbstractPaceFunctions { return buf.toString(); } - protected String filterStopWords(final String s, final Set stopwords) { + protected static String filterStopWords(final String s, final Set stopwords) { final StringTokenizer st = new StringTokenizer(s); final StringBuilder sb = new StringBuilder(); while (st.hasMoreTokens()) { @@ -208,7 +209,7 @@ public abstract class AbstractPaceFunctions { return sb.toString().trim(); } - public String filterAllStopWords(String s) { + public static String filterAllStopWords(String s) { s = filterStopWords(s, stopwords_en); s = filterStopWords(s, stopwords_de); @@ -221,7 +222,8 @@ public abstract class AbstractPaceFunctions { return s; } - protected Collection filterBlacklisted(final Collection set, final Set ngramBlacklist) { + protected static Collection filterBlacklisted(final Collection set, + final Set ngramBlacklist) { final Set newset = Sets.newLinkedHashSet(); for (final String s : set) { if (!ngramBlacklist.contains(s)) { @@ -268,7 +270,7 @@ public abstract class AbstractPaceFunctions { return m; } - public String removeKeywords(String s, Set keywords) { + public static String removeKeywords(String s, Set keywords) { s = " " + s + " "; for (String k : keywords) { @@ -278,39 +280,39 @@ public abstract class AbstractPaceFunctions { return s.trim(); } - public double commonElementsPercentage(Set s1, Set s2) { + public static double commonElementsPercentage(Set s1, Set s2) { double longer = Math.max(s1.size(), s2.size()); return (double) s1.stream().filter(s2::contains).count() / longer; } // convert the set of keywords to codes - public Set toCodes(Set keywords, Map translationMap) { + public static Set toCodes(Set keywords, Map translationMap) { return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet()); } - public Set keywordsToCodes(Set keywords, Map translationMap) { + public static Set keywordsToCodes(Set keywords, Map translationMap) { return toCodes(keywords, translationMap); } - public Set citiesToCodes(Set keywords) { + public static Set citiesToCodes(Set keywords) { return toCodes(keywords, cityMap); } - protected String firstLC(final String s) { + protected static String firstLC(final String s) { return StringUtils.substring(s, 0, 1).toLowerCase(); } - protected Iterable tokens(final String s, final int maxTokens) { + protected static Iterable tokens(final String s, final int maxTokens) { return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens); } - public String normalizePid(String pid) { + public static String normalizePid(String pid) { return DOI_PREFIX.matcher(pid.toLowerCase()).replaceAll(""); } // get the list of keywords into the input string - public Set getKeywords(String s1, Map translationMap, int windowSize) { + public static Set getKeywords(String s1, Map translationMap, int windowSize) { String s = s1; @@ -340,7 +342,7 @@ public abstract class AbstractPaceFunctions { return codes; } - public Set getCities(String s1, int windowSize) { + public static Set getCities(String s1, int windowSize) { return getKeywords(s1, cityMap, windowSize); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java index d9ad81d42..5ede2c380 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java @@ -18,7 +18,7 @@ public class ClusteringDef implements Serializable { private List fields; - private Map params; + private Map params; public ClusteringDef() { } @@ -43,11 +43,11 @@ public class ClusteringDef implements Serializable { this.fields = fields; } - public Map getParams() { + public Map getParams() { return params; } - public void setParams(final Map params) { + public void setParams(final Map params) { this.params = params; } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java index f34545e6d..7ad9b7445 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java @@ -2,6 +2,7 @@ package eu.dnetlib.pace.model; import java.io.Serializable; +import java.util.HashSet; import java.util.List; import com.fasterxml.jackson.core.JsonProcessingException; @@ -36,6 +37,16 @@ public class FieldDef implements Serializable { */ private int length = -1; + private HashSet filter; + + private boolean sorted; + + public boolean isSorted() { + return sorted; + } + + private String clean; + public FieldDef() { } @@ -91,6 +102,30 @@ public class FieldDef implements Serializable { this.path = path; } + public HashSet getFilter() { + return filter; + } + + public void setFilter(HashSet filter) { + this.filter = filter; + } + + public boolean getSorted() { + return sorted; + } + + public void setSorted(boolean sorted) { + this.sorted = sorted; + } + + public String getClean() { + return clean; + } + + public void setClean(String clean) { + this.clean = clean; + } + @Override public String toString() { try { diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala index b3f56bcdb..bc702b9e2 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala @@ -5,9 +5,9 @@ import eu.dnetlib.pace.util.{BlockProcessor, SparkReporter} import org.apache.spark.SparkContext import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.expressions._ -import org.apache.spark.sql.functions.{col, lit, udf} +import org.apache.spark.sql.functions.{col, desc, expr, lit, udf} import org.apache.spark.sql.types._ -import org.apache.spark.sql.{Column, Dataset, Row, functions} +import org.apache.spark.sql.{Column, Dataset, Row, SaveMode, functions} import java.util.function.Predicate import java.util.stream.Collectors @@ -80,6 +80,8 @@ case class SparkDeduper(conf: DedupConfig) extends Serializable { .withColumn("key", functions.explode(clusterValuesUDF(cd).apply(functions.array(inputColumns: _*)))) // Add position column having the position of the row within the set of rows having the same key value ordered by the sorting value .withColumn("position", functions.row_number().over(Window.partitionBy("key").orderBy(col(model.orderingFieldName), col(model.identifierFieldName)))) + // .withColumn("count", functions.max("position").over(Window.partitionBy("key").orderBy(col(model.orderingFieldName), col(model.identifierFieldName)).rowsBetween(Window.unboundedPreceding,Window.unboundedFollowing) )) + // .filter("count > 1") if (df_with_clustering_keys == null) df_with_clustering_keys = ds @@ -88,20 +90,44 @@ case class SparkDeduper(conf: DedupConfig) extends Serializable { } //TODO: analytics + /*df_with_clustering_keys.groupBy(col("clustering"), col("key")) + .agg(expr("max(count) AS size")) + .orderBy(desc("size")) + .show*/ val df_with_blocks = df_with_clustering_keys - // filter out rows with position exceeding the maxqueuesize parameter - .filter(col("position").leq(conf.getWf.getQueueMaxSize)) - .groupBy("clustering", "key") + // split the clustering block into smaller blocks of queuemaxsize + .groupBy(col("clustering"), col("key"), functions.floor(col("position").divide(lit(conf.getWf.getQueueMaxSize)))) .agg(functions.collect_set(functions.struct(model.schema.fieldNames.map(col): _*)).as("block")) .filter(functions.size(new Column("block")).gt(1)) + .union( + //adjacency blocks + df_with_clustering_keys + // filter out leading and trailing elements + .filter(col("position").gt(conf.getWf.getSlidingWindowSize/2)) + //.filter(col("position").lt(col("count").minus(conf.getWf.getSlidingWindowSize/2))) + // create small blocks of records on "the border" of maxqueuesize: getSlidingWindowSize/2 elements before and after + .filter( + col("position").mod(conf.getWf.getQueueMaxSize).lt(conf.getWf.getSlidingWindowSize/2) // slice of the start of block + || col("position").mod(conf.getWf.getQueueMaxSize).gt(conf.getWf.getQueueMaxSize - (conf.getWf.getSlidingWindowSize/2)) //slice of the end of the block + ) + .groupBy(col("clustering"), col("key"), functions.floor((col("position") + lit(conf.getWf.getSlidingWindowSize/2)).divide(lit(conf.getWf.getQueueMaxSize)))) + .agg(functions.collect_set(functions.struct(model.schema.fieldNames.map(col): _*)).as("block")) + .filter(functions.size(new Column("block")).gt(1)) + ) df_with_blocks } def clusterValuesUDF(cd: ClusteringDef) = { udf[mutable.WrappedArray[String], mutable.WrappedArray[Any]](values => { - values.flatMap(f => cd.clusteringFunction().apply(conf, Seq(f.toString).asJava).asScala) + val valueList = values.flatMap { + case a: mutable.WrappedArray[Any] => a.map(_.toString) + case s: Any => Seq(s.toString) + }.asJava; + + mutable.WrappedArray.make(cd.clusteringFunction().apply(conf, valueList).toArray()) + }) } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala index aa997c6e9..aa04188da 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala @@ -1,13 +1,16 @@ package eu.dnetlib.pace.model import com.jayway.jsonpath.{Configuration, JsonPath} +import eu.dnetlib.pace.common.AbstractPaceFunctions import eu.dnetlib.pace.config.{DedupConfig, Type} import eu.dnetlib.pace.util.MapDocumentUtil +import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} import org.apache.spark.sql.{Dataset, Row} +import java.util.Locale import java.util.regex.Pattern import scala.collection.JavaConverters._ @@ -60,7 +63,7 @@ case class SparkModel(conf: DedupConfig) { values(identityFieldPosition) = MapDocumentUtil.getJPathString(conf.getWf.getIdPath, documentContext) schema.fieldNames.zipWithIndex.foldLeft(values) { - case ((res, (fname, index))) => { + case ((res, (fname, index))) => val fdef = conf.getPace.getModelMap.get(fname) if (fdef != null) { @@ -96,13 +99,52 @@ case class SparkModel(conf: DedupConfig) { case Type.DoubleArray => MapDocumentUtil.getJPathArray(fdef.getPath, json) } + + val filter = fdef.getFilter + + if (StringUtils.isNotBlank(fdef.getClean)) { + res(index) = res(index) match { + case x: Seq[String] => x.map(clean(_, fdef.getClean)).toSeq + case _ => clean(res(index).toString, fdef.getClean) + } + } + + if (filter != null && !filter.isEmpty) { + res(index) = res(index) match { + case x: String if filter.contains(x.toLowerCase(Locale.ROOT)) => null + case x: Seq[String] => x.filter(s => !filter.contains(s.toLowerCase(Locale.ROOT))).toSeq + case _ => res(index) + } + } + + if (fdef.getSorted) { + res(index) = res(index) match { + case x: Seq[String] => x.sorted.toSeq + case _ => res(index) + } + } } res - } } new GenericRowWithSchema(values, schema) } + + def clean(value: String, cleantype: String) : String = { + val res = cleantype match { + case "title" => AbstractPaceFunctions.cleanup(value) + case _ => value + } + +// if (!res.equals(AbstractPaceFunctions.normalize(value))) { +// println(res) +// println(AbstractPaceFunctions.normalize(value)) +// println() +// } + + res + } + } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java index 5c6939e60..edad0ae2e 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java @@ -23,7 +23,6 @@ public class AuthorsMatch extends AbstractListComparator { private String MODE; // full or surname private int SIZE_THRESHOLD; private String TYPE; // count or percentage - private int common; public AuthorsMatch(Map params) { super(params, new com.wcohen.ss.JaroWinkler()); @@ -35,7 +34,6 @@ public class AuthorsMatch extends AbstractListComparator { FULLNAME_THRESHOLD = Double.parseDouble(params.getOrDefault("fullname_th", "0.9")); SIZE_THRESHOLD = Integer.parseInt(params.getOrDefault("size_th", "20")); TYPE = params.getOrDefault("type", "percentage"); - common = 0; } protected AuthorsMatch(double w, AbstractStringDistance ssalgo) { @@ -44,22 +42,27 @@ public class AuthorsMatch extends AbstractListComparator { @Override public double compare(final List a, final List b, final Config conf) { - if (a.isEmpty() || b.isEmpty()) return -1; if (a.size() > SIZE_THRESHOLD || b.size() > SIZE_THRESHOLD) return 1.0; - List aList = a.stream().map(author -> new Person(author, false)).collect(Collectors.toList()); + int maxMiss = Integer.MAX_VALUE; List bList = b.stream().map(author -> new Person(author, false)).collect(Collectors.toList()); - common = 0; + Double threshold = getDoubleParam("threshold"); + + if (threshold != null && threshold >= 0.0 && threshold <= 1.0 && a.size() == b.size()) { + maxMiss = (int) Math.floor((1 - threshold) * Math.max(a.size(), b.size())); + } + + int common = 0; // compare each element of List1 with each element of List2 - for (Person p1 : aList) + for (int i = 0; i < a.size(); i++) { + Person p1 = new Person(a.get(i), false); for (Person p2 : bList) { - // both persons are inaccurate if (!p1.isAccurate() && !p2.isAccurate()) { // compare just normalized fullnames @@ -118,11 +121,15 @@ public class AuthorsMatch extends AbstractListComparator { } } - } + if (i - common > maxMiss) { + return 0.0; + } + } + // normalization factor to compute the score - int normFactor = aList.size() == bList.size() ? aList.size() : (aList.size() + bList.size() - common); + int normFactor = a.size() == b.size() ? a.size() : (a.size() + b.size() - common); if (TYPE.equals("percentage")) { return (double) common / normFactor; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java index 238cb16ce..34ebcf7a7 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java @@ -25,6 +25,7 @@ public class InstanceTypeMatch extends AbstractListComparator { translationMap.put("Conference object", "*"); translationMap.put("Other literature type", "*"); translationMap.put("Unknown", "*"); + translationMap.put("UNKNOWN", "*"); // article types translationMap.put("Article", "Article"); @@ -76,5 +77,4 @@ public class InstanceTypeMatch extends AbstractListComparator { protected double normalize(final double d) { return d; } - } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java index 877cb95ab..e2ee062b5 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java @@ -3,6 +3,7 @@ package eu.dnetlib.pace.tree; import java.util.Map; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -30,16 +31,25 @@ public class LevensteinTitle extends AbstractStringComparator { } @Override - public double distance(final String a, final String b, final Config conf) { - final String ca = cleanup(a); - final String cb = cleanup(b); - + public double distance(final String ca, final String cb, final Config conf) { final boolean check = checkNumbers(ca, cb); if (check) return 0.5; - return normalize(ssalgo.score(ca, cb), ca.length(), cb.length()); + Double threshold = getDoubleParam("threshold"); + + // reduce Levenshtein algo complexity when target threshold is known + if (threshold != null && threshold >= 0.0 && threshold <= 1.0) { + int maxdistance = (int) Math.floor((1 - threshold) * Math.max(ca.length(), cb.length())); + int score = StringUtils.getLevenshteinDistance(ca, cb, maxdistance); + if (score == -1) { + return 0; + } + return normalize(score, ca.length(), cb.length()); + } else { + return normalize(StringUtils.getLevenshteinDistance(ca, cb), ca.length(), cb.length()); + } } private double normalize(final double score, final int la, final int lb) { diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/MaxLengthMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/MaxLengthMatch.java new file mode 100644 index 000000000..8f525c6d5 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/MaxLengthMatch.java @@ -0,0 +1,29 @@ + +package eu.dnetlib.pace.tree; + +import java.util.Map; + +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.tree.support.AbstractStringComparator; +import eu.dnetlib.pace.tree.support.ComparatorClass; + +@ComparatorClass("maxLengthMatch") +public class MaxLengthMatch extends AbstractStringComparator { + + private final int limit; + + public MaxLengthMatch(Map params) { + super(params); + + limit = Integer.parseInt(params.getOrDefault("limit", "200")); + } + + @Override + public double compare(String a, String b, final Config conf) { + return a.length() < limit && b.length() < limit ? 1.0 : -1.0; + } + + protected String toString(final Object object) { + return toFirstString(object); + } +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java index 8a957c5e3..cde73fd2b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java @@ -127,4 +127,14 @@ public abstract class AbstractComparator extends AbstractPaceFunctions implem return this.weight; } + public Double getDoubleParam(String name) { + String svalue = params.get(name); + + try { + return Double.parseDouble(svalue); + } catch (Throwable t) { + } + + return null; + } } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java index c2b0ddda7..177ad73df 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java @@ -67,8 +67,10 @@ public class BlockProcessor { private void processRows(final List queue, final Reporter context) { - for (int pivotPos = 0; pivotPos < queue.size(); pivotPos++) { - final Row pivot = queue.get(pivotPos); + IncrementalConnectedComponents icc = new IncrementalConnectedComponents(queue.size()); + + for (int i = 0; i < queue.size(); i++) { + final Row pivot = queue.get(i); final String idPivot = pivot.getString(identifierFieldPos); // identifier final Object fieldsPivot = getJavaValue(pivot, orderFieldPos); @@ -76,9 +78,9 @@ public class BlockProcessor { final WfConfig wf = dedupConf.getWf(); if (fieldPivot != null) { - int i = 0; - for (int windowPos = pivotPos + 1; windowPos < queue.size(); windowPos++) { - final Row curr = queue.get(windowPos); + for (int j = icc.nextUnconnected(i, i + 1); j >= 0 + && j < queue.size(); j = icc.nextUnconnected(i, j + 1)) { + final Row curr = queue.get(j); final String idCurr = curr.getString(identifierFieldPos); // identifier if (mustSkip(idCurr)) { @@ -86,7 +88,7 @@ public class BlockProcessor { break; } - if (++i > wf.getSlidingWindowSize()) { + if (wf.getSlidingWindowSize() > 0 && (j - i) > wf.getSlidingWindowSize()) { break; } @@ -97,7 +99,9 @@ public class BlockProcessor { final TreeProcessor treeProcessor = new TreeProcessor(dedupConf); - emitOutput(treeProcessor.compare(pivot, curr), idPivot, idCurr, context); + if (emitOutput(treeProcessor.compare(pivot, curr), idPivot, idCurr, context)) { + icc.connect(i, j); + } } } } @@ -115,7 +119,8 @@ public class BlockProcessor { return null; } - private void emitOutput(final boolean result, final String idPivot, final String idCurr, final Reporter context) { + private boolean emitOutput(final boolean result, final String idPivot, final String idCurr, + final Reporter context) { if (result) { if (idPivot.compareTo(idCurr) <= 0) { @@ -127,6 +132,8 @@ public class BlockProcessor { } else { context.incrementCounter(dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold(), 1); } + + return result; } private boolean mustSkip(final String idPivot) { @@ -142,5 +149,4 @@ public class BlockProcessor { context.emit(type, from, to); } - } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/IncrementalConnectedComponents.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/IncrementalConnectedComponents.java new file mode 100644 index 000000000..ed35239a8 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/IncrementalConnectedComponents.java @@ -0,0 +1,50 @@ + +package eu.dnetlib.pace.util; + +import java.util.BitSet; + +public class IncrementalConnectedComponents { + final private int size; + + final private BitSet[] indexes; + + IncrementalConnectedComponents(int size) { + this.size = size; + this.indexes = new BitSet[size]; + } + + public void connect(int i, int j) { + if (indexes[i] == null) { + if (indexes[j] == null) { + indexes[i] = new BitSet(size); + } else { + indexes[i] = indexes[j]; + } + } else { + if (indexes[j] != null && indexes[i] != indexes[j]) { + // merge adjacency lists for i and j + indexes[i].or(indexes[j]); + } + } + + indexes[i].set(i); + indexes[i].set(j); + indexes[j] = indexes[i]; + } + + public int nextUnconnected(int i, int j) { + if (indexes[i] == null) { + return j; + } + int result = indexes[i].nextClearBit(j); + + return (result >= size) ? -1 : result; + } + + public BitSet getConnections(int i) { + if (indexes[i] == null) { + return null; + } + return indexes[i]; + } +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java index 28244cb3b..7dc340663 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java @@ -97,6 +97,8 @@ public class MapDocumentUtil { Object o = json.read(jsonPath); if (o instanceof String) return (String) o; + if (o instanceof Number) + return (String) o.toString(); if (o instanceof JSONArray && ((JSONArray) o).size() > 0) return (String) ((JSONArray) o).get(0); return ""; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java index 252205c79..746892f0c 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java @@ -40,7 +40,7 @@ public class PaceResolver implements Serializable { Collectors.toMap(cl -> cl.getAnnotation(ComparatorClass.class).value(), cl -> (Class) cl)); } - public ClusteringFunction getClusteringFunction(String name, Map params) throws PaceException { + public ClusteringFunction getClusteringFunction(String name, Map params) throws PaceException { try { return clusteringFunctions.get(name).getDeclaredConstructor(Map.class).newInstance(params); } catch (InstantiationException | IllegalAccessException | InvocationTargetException diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java index f9a1ea9e2..80e349a3f 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java @@ -15,7 +15,7 @@ import eu.dnetlib.pace.config.DedupConfig; public class ClusteringFunctionTest extends AbstractPaceTest { - private static Map params; + private static Map params; private static DedupConfig conf; @BeforeAll @@ -40,10 +40,10 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testNgram() { - params.put("ngramLen", 3); - params.put("max", 8); - params.put("maxPerToken", 2); - params.put("minNgramLen", 1); + params.put("ngramLen", "3"); + params.put("max", "8"); + params.put("maxPerToken", "2"); + params.put("minNgramLen", "1"); final ClusteringFunction ngram = new Ngrams(params); @@ -54,8 +54,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testNgramPairs() { - params.put("ngramLen", 3); - params.put("max", 2); + params.put("ngramLen", "3"); + params.put("max", "2"); final ClusteringFunction np = new NgramPairs(params); @@ -66,8 +66,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testSortedNgramPairs() { - params.put("ngramLen", 3); - params.put("max", 2); + params.put("ngramLen", "3"); + params.put("max", "2"); final ClusteringFunction np = new SortedNgramPairs(params); @@ -87,9 +87,9 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testAcronym() { - params.put("max", 4); - params.put("minLen", 1); - params.put("maxLen", 3); + params.put("max", "4"); + params.put("minLen", "1"); + params.put("maxLen", "3"); final ClusteringFunction acro = new Acronyms(params); @@ -100,8 +100,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testSuffixPrefix() { - params.put("len", 3); - params.put("max", 4); + params.put("len", "3"); + params.put("max", "4"); final ClusteringFunction sp = new SuffixPrefix(params); @@ -109,8 +109,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { System.out.println(s); System.out.println(sp.apply(conf, Lists.newArrayList(s))); - params.put("len", 3); - params.put("max", 1); + params.put("len", "3"); + params.put("max", "1"); System.out.println(sp.apply(conf, Lists.newArrayList("Framework for general-purpose deduplication"))); } @@ -118,8 +118,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testWordsSuffixPrefix() { - params.put("len", 3); - params.put("max", 4); + params.put("len", "3"); + params.put("max", "4"); final ClusteringFunction sp = new WordsSuffixPrefix(params); @@ -130,7 +130,7 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testWordsStatsSuffixPrefix() { - params.put("mod", 10); + params.put("mod", "10"); final ClusteringFunction sp = new WordsStatsSuffixPrefixChain(params); @@ -167,7 +167,7 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testFieldValue() { - params.put("randomLength", 5); + params.put("randomLength", "5"); final ClusteringFunction sp = new SpaceTrimmingFieldValue(params); diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/IncrementalConnectedComponentsTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/IncrementalConnectedComponentsTest.java new file mode 100644 index 000000000..b0f105d7c --- /dev/null +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/IncrementalConnectedComponentsTest.java @@ -0,0 +1,40 @@ + +package eu.dnetlib.pace.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +import org.junit.jupiter.api.Test; + +public class IncrementalConnectedComponentsTest { + + @Test + public void transitiveClosureTest() { + IncrementalConnectedComponents icc = new IncrementalConnectedComponents(10); + + icc.connect(0, 1); + icc.connect(0, 2); + icc.connect(0, 3); + + icc.connect(1, 2); + icc.connect(1, 4); + icc.connect(1, 5); + + icc.connect(6, 7); + icc.connect(6, 9); + + assertEquals(icc.getConnections(0).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(1).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(2).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(3).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(4).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(5).toString(), "{0, 1, 2, 3, 4, 5}"); + + assertEquals(icc.getConnections(6).toString(), "{6, 7, 9}"); + assertEquals(icc.getConnections(7).toString(), "{6, 7, 9}"); + assertEquals(icc.getConnections(9).toString(), "{6, 7, 9}"); + + assertNull(icc.getConnections(8)); + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java index 68af3d699..0af7bb6d0 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java @@ -101,6 +101,10 @@ abstract class AbstractSparkAction implements Serializable { return SparkSession.builder().config(conf).getOrCreate(); } + protected static SparkSession getSparkWithHiveSession(SparkConf conf) { + return SparkSession.builder().enableHiveSupport().config(conf).getOrCreate(); + } + protected static void save(Dataset dataset, String outPath, SaveMode mode) { dataset.write().option("compression", "gzip").mode(mode).json(outPath); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 60669106a..d9fb24078 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -2,20 +2,19 @@ package eu.dnetlib.dhp.oa.dedup; import java.lang.reflect.InvocationTargetException; -import java.util.*; -import java.util.stream.Collectors; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; -import org.apache.commons.beanutils.BeanUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.api.java.function.ReduceFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; @@ -41,88 +40,91 @@ public class DedupRecordFactory { long ts = System.currentTimeMillis(); // - Dataset> entities = spark + Dataset entities = spark .read() - .textFile(entitiesInputPath) + .schema(Encoders.bean(clazz).schema()) + .json(entitiesInputPath) + .as(Encoders.bean(clazz)) .map( - (MapFunction>) it -> { - T entity = OBJECT_MAPPER.readValue(it, clazz); + (MapFunction>) entity -> { return new Tuple2<>(entity.getId(), entity); }, - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))); + Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) + .selectExpr("_1 AS id", "_2 AS kryoObject"); // : source is the dedup_id, target is the id of the mergedIn - Dataset> mergeRels = spark + Dataset mergeRels = spark .read() .load(mergeRelsInputPath) - .as(Encoders.bean(Relation.class)) .where("relClass == 'merges'") - .map( - (MapFunction>) r -> new Tuple2<>(r.getSource(), r.getTarget()), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + .selectExpr("source as dedupId", "target as id"); return mergeRels - .joinWith(entities, mergeRels.col("_2").equalTo(entities.col("_1")), "inner") + .join(entities, "id") + .select("dedupId", "kryoObject") + .as(Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) + .groupByKey((MapFunction, String>) Tuple2::_1, Encoders.STRING()) + .reduceGroups( + (ReduceFunction>) (t1, t2) -> new Tuple2<>(t1._1(), + reduceEntity(t1._1(), t1._2(), t2._2(), clazz))) .map( - (MapFunction, Tuple2>, Tuple2>) value -> new Tuple2<>( - value._1()._1(), value._2()._2()), - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) - .groupByKey( - (MapFunction, String>) Tuple2::_1, Encoders.STRING()) - .mapGroups( - (MapGroupsFunction, T>) (key, - values) -> entityMerger(key, values, ts, dataInfo, clazz), + (MapFunction>, T>) t -> { + T res = t._2()._2(); + res.setDataInfo(dataInfo); + res.setLastupdatetimestamp(ts); + return res; + }, Encoders.bean(clazz)); } + public static T reduceEntity( + String id, T entity, T duplicate, Class clazz) { + + int compare = new IdentifierComparator() + .compare(Identifier.newInstance(entity), Identifier.newInstance(duplicate)); + + if (compare > 0) { + T swap = duplicate; + duplicate = entity; + entity = swap; + } + + entity.mergeFrom(duplicate); + entity.setId(id); + + if (ModelSupport.isSubClass(duplicate, Result.class)) { + Result re = (Result) entity; + Result rd = (Result) duplicate; + + List> authors = new ArrayList<>(); + if (re.getAuthor() != null) { + authors.add(re.getAuthor()); + } + if (rd.getAuthor() != null) { + authors.add(rd.getAuthor()); + } + + re.setAuthor(AuthorMerger.merge(authors)); + } + + return entity; + } + public static T entityMerger( String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) throws IllegalAccessException, InstantiationException, InvocationTargetException { + T base = entities.next()._2(); - final Comparator> idComparator = new IdentifierComparator<>(); - - final LinkedList entityList = Lists - .newArrayList(entities) - .stream() - .map(t -> Identifier.newInstance(t._2())) - .sorted(idComparator) - .map(Identifier::getEntity) - .collect(Collectors.toCollection(LinkedList::new)); - - final T entity = clazz.newInstance(); - final T first = entityList.removeFirst(); - - BeanUtils.copyProperties(entity, first); - - final List> authors = Lists.newArrayList(); - - entityList - .forEach( - duplicate -> { - entity.mergeFrom(duplicate); - if (ModelSupport.isSubClass(duplicate, Result.class)) { - Result r1 = (Result) duplicate; - Optional - .ofNullable(r1.getAuthor()) - .ifPresent(a -> authors.add(a)); - } - }); - - // set authors and date - if (ModelSupport.isSubClass(entity, Result.class)) { - Optional - .ofNullable(((Result) entity).getAuthor()) - .ifPresent(a -> authors.add(a)); - - ((Result) entity).setAuthor(AuthorMerger.merge(authors)); + while (entities.hasNext()) { + T duplicate = entities.next()._2(); + if (duplicate != null) + base = reduceEntity(id, base, duplicate, clazz); } - entity.setId(id); + base.setDataInfo(dataInfo); + base.setLastupdatetimestamp(ts); - entity.setLastupdatetimestamp(ts); - entity.setDataInfo(dataInfo); - - return entity; + return base; } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java index 7e0d66062..37e1bfd15 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.oa.dedup; +import static eu.dnetlib.dhp.utils.DHPUtils.md5; import static org.apache.commons.lang3.StringUtils.substringAfter; import static org.apache.commons.lang3.StringUtils.substringBefore; @@ -14,33 +15,36 @@ import eu.dnetlib.dhp.schema.oaf.utils.PidType; public class IdGenerator implements Serializable { // pick the best pid from the list (consider date and pidtype) - public static String generate(List> pids, String defaultID) { + public static String generate(List pids, String defaultID) { if (pids == null || pids.isEmpty()) return defaultID; return generateId(pids); } - private static String generateId(List> pids) { - Identifier bp = pids + private static String generateId(List pids) { + Identifier bp = pids .stream() .min(Identifier::compareTo) .orElseThrow(() -> new IllegalStateException("unable to generate id")); - String prefix = substringBefore(bp.getOriginalID(), "|"); - String ns = substringBefore(substringAfter(bp.getOriginalID(), "|"), "::"); - String suffix = substringAfter(bp.getOriginalID(), "::"); + return generate(bp.getOriginalID()); + } + + public static String generate(String originalId) { + String prefix = substringBefore(originalId, "|"); + String ns = substringBefore(substringAfter(originalId, "|"), "::"); + String suffix = substringAfter(originalId, "::"); final String pidType = substringBefore(ns, "_"); if (PidType.isValid(pidType)) { return prefix + "|" + dedupify(ns) + "::" + suffix; } else { - return prefix + "|dedup_wf_001::" + suffix; + return prefix + "|dedup_wf_001::" + md5(originalId); // hash the whole originalId to avoid collisions } } private static String dedupify(String ns) { - StringBuilder prefix; if (PidType.valueOf(substringBefore(ns, "_")) == PidType.openorgs) { prefix = new StringBuilder(substringBefore(ns, "_")); @@ -53,5 +57,4 @@ public class IdGenerator implements Serializable { } return prefix.substring(0, 12); } - } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index babbaaabd..5bb132b89 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -1,51 +1,47 @@ package eu.dnetlib.dhp.oa.dedup; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; -import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVENANCE_DEDUP; - -import java.io.IOException; -import java.util.*; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; -import org.apache.spark.graphx.Edge; -import org.apache.spark.rdd.RDD; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.dom4j.DocumentException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; - -import com.google.common.collect.Lists; import com.google.common.hash.Hashing; - +import com.kwartile.lib.cc.ConnectedComponent; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.dedup.graph.ConnectedComponent; -import eu.dnetlib.dhp.oa.dedup.graph.GraphProcessor; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.*; +import org.apache.spark.sql.catalyst.encoders.RowEncoder; +import org.apache.spark.sql.expressions.UserDefinedFunction; +import org.apache.spark.sql.expressions.Window; +import org.apache.spark.sql.expressions.WindowSpec; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.StructType; +import org.dom4j.DocumentException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; +import scala.Tuple3; +import scala.collection.JavaConversions; + +import java.io.IOException; +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Optional; + +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVENANCE_DEDUP; +import static org.apache.spark.sql.functions.*; public class SparkCreateMergeRels extends AbstractSparkAction { @@ -68,10 +64,12 @@ public class SparkCreateMergeRels extends AbstractSparkAction { log.info("isLookupUrl {}", isLookUpUrl); SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", parser.get("hiveMetastoreUris")); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); - new SparkCreateMergeRels(parser, getSparkSession(conf)) + new SparkCreateMergeRels(parser, getSparkWithHiveSession(conf)) .run(ISLookupClientFactory.getLookUpService(isLookUpUrl)); } @@ -87,14 +85,15 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .ofNullable(parser.get("cutConnectedComponent")) .map(Integer::valueOf) .orElse(0); + + final String pivotHistoryDatabase = parser.get("pivotHistoryDatabase"); + log.info("connected component cut: '{}'", cut); log.info("graphBasePath: '{}'", graphBasePath); log.info("isLookUpUrl: '{}'", isLookUpUrl); log.info("actionSetId: '{}'", actionSetId); log.info("workingPath: '{}'", workingPath); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { final String subEntity = dedupConf.getWf().getSubEntityValue(); final Class clazz = ModelSupport.entityTypes.get(EntityType.valueOf(subEntity)); @@ -106,113 +105,170 @@ public class SparkCreateMergeRels extends AbstractSparkAction { final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, subEntity); - // - JavaPairRDD vertexes = createVertexes(sc, graphBasePath, subEntity, dedupConf); - - final RDD> edgeRdd = spark + final Dataset simRels = spark .read() .load(DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity)) - .as(Encoders.bean(Relation.class)) - .javaRDD() - .map(it -> new Edge<>(hash(it.getSource()), hash(it.getTarget()), it.getRelClass())) - .rdd(); + .select("source", "target"); - Dataset> rawMergeRels = spark - .createDataset( - GraphProcessor - .findCCs(vertexes.rdd(), edgeRdd, maxIterations, cut) - .toJavaRDD() - .filter(k -> k.getIds().size() > 1) - .flatMap(this::ccToRels) - .rdd(), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + UserDefinedFunction hashUDF = functions + .udf( + (String s) -> hash(s), DataTypes.LongType); - Dataset> entities = spark + // + Dataset vertexIdMap = simRels + .selectExpr("source as id") + .union(simRels.selectExpr("target as id")) + .distinct() + .withColumn("vertexId", hashUDF.apply(functions.col("id"))); + + final Dataset edges = spark .read() - .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .map( - (MapFunction>) it -> { - OafEntity entity = OBJECT_MAPPER.readValue(it, clazz); - return new Tuple2<>(entity.getId(), entity); - }, - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))); + .load(DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity)) + .select("source", "target") + .withColumn("source", hashUDF.apply(functions.col("source"))) + .withColumn("target", hashUDF.apply(functions.col("target"))); - Dataset mergeRels = rawMergeRels - .joinWith(entities, rawMergeRels.col("_2").equalTo(entities.col("_1")), "inner") - // , - .map( - (MapFunction, Tuple2>, Tuple2>) value -> new Tuple2<>( - value._1()._1(), value._2()._2()), - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) - // - .groupByKey( - (MapFunction, String>) Tuple2::_1, Encoders.STRING()) - .mapGroups( - (MapGroupsFunction, ConnectedComponent>) this::generateID, - Encoders.bean(ConnectedComponent.class)) - // + Dataset cliques = ConnectedComponent + .runOnPairs(edges, 50, spark); + + Dataset rawMergeRels = cliques + .join(vertexIdMap, JavaConversions.asScalaBuffer(Collections.singletonList("vertexId")), "inner") + .drop("vertexId") + .distinct(); + + Dataset pivotHistory = spark + .createDataset( + Collections.emptyList(), + RowEncoder + .apply(StructType.fromDDL("id STRING, firstUsage STRING, lastUsage STRING, dedupId STRING"))); + + if (StringUtils.isNotBlank(pivotHistoryDatabase)) { + pivotHistory = spark + .read() + .table(pivotHistoryDatabase + "." + subEntity) + .selectExpr("id", "lastUsage", "dedupId"); + } + + String collectedfromExpr = "false AS collectedfrom"; + String dateExpr = "'' AS date"; + + if (Result.class.isAssignableFrom(clazz)) { + if (Publication.class.isAssignableFrom(clazz)) { + collectedfromExpr = "array_contains(collectedfrom.key, '" + ModelConstants.CROSSREF_ID + + "') AS collectedfrom"; + } else if (eu.dnetlib.dhp.schema.oaf.Dataset.class.isAssignableFrom(clazz)) { + collectedfromExpr = "array_contains(collectedfrom.key, '" + ModelConstants.DATACITE_ID + + "') AS collectedfrom"; + } + + dateExpr = "dateofacceptance.value AS date"; + } + + UserDefinedFunction mapPid = udf( + (String s) -> Math.min(PidType.tryValueOf(s).ordinal(), PidType.w3id.ordinal()), DataTypes.IntegerType); + UserDefinedFunction validDate = udf((String date) -> { + if (StringUtils.isNotBlank(date) + && date.matches(DatePicker.DATE_PATTERN) && DatePicker.inRange(date)) { + return date; + } + return LocalDate.now().plusWeeks(1).toString(); + }, DataTypes.StringType); + + Dataset pivotingData = spark + .read() + .schema(Encoders.bean(clazz).schema()) + .json(DedupUtility.createEntityPath(graphBasePath, subEntity)) + .selectExpr( + "id", + "regexp_extract(id, '^\\\\d+\\\\|([^_]+).*::', 1) AS pidType", + collectedfromExpr, + dateExpr) + .withColumn("pidType", mapPid.apply(col("pidType"))) // ordinal of pid type + .withColumn("date", validDate.apply(col("date"))); + + UserDefinedFunction generateDedupId = udf((String s) -> IdGenerator.generate(s), DataTypes.StringType); + + // ordering to selected pivot id + WindowSpec w = Window + .partitionBy("groupId") + .orderBy( + col("lastUsage").desc_nulls_last(), + col("pidType").asc_nulls_last(), + col("collectedfrom").desc_nulls_last(), + col("date").asc_nulls_last(), + col("id").asc_nulls_last()); + + Dataset output = rawMergeRels + .join(pivotHistory, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "full") + .join(pivotingData, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") + .withColumn("pivot", functions.first("id").over(w)) + .withColumn("pivotDedupId", functions.first("dedupId").over(w)) + .withColumn("position", functions.row_number().over(w)) + .filter(cut > 0 ? col("position").lt(lit(cut)) : lit(true)) + // .select("id", "groupId", "collectedfrom", "pivot", "dedupId", "pivotDedupId") + // .distinct() .flatMap( - (FlatMapFunction) cc -> ccToMergeRel(cc, dedupConf), - Encoders.bean(Relation.class)); + (FlatMapFunction>) (Row r) -> { + String id = r.getAs("id"); + String pivot = r.getAs("pivot"); + String pivotDedupId = r.getAs("pivotDedupId"); // dedupId associated with the pivot + String dedupId = r.getAs("dedupId"); // dedupId associated with this id if it was a pivot - saveParquet(mergeRels, mergeRelPath, SaveMode.Overwrite); + // filter out id == pivotDedupId + // those are caused by claim expressed on pivotDedupId + // information will be merged after creating deduprecord + if (id.equals(pivotDedupId)) { + return Collections.emptyIterator(); + } + ArrayList> res = new ArrayList<>(); + + // singleton pivots have null groupId as they do not match rawMergeRels + if (r.isNullAt(r.fieldIndex("groupId"))) { + // the record is existing if it matches pivotingData + if (!r.isNullAt(r.fieldIndex("collectedfrom"))) { + // create relation with old dedup id + res.add(new Tuple3<>(id, dedupId, null)); + } + return res.iterator(); + } + + // new pivot, assign pivotDedupId with current IdGenerator + if (StringUtils.isBlank(pivotDedupId)) { + pivotDedupId = IdGenerator.generate(pivot); + } + + // this was a pivot in a preceding graph but it has been merged into a new group with different + // pivot + if (StringUtils.isNotBlank(dedupId) && !pivot.equals(id) && !dedupId.equals(pivotDedupId)) { + // materialize the previous dedup record as a merge relation with the new one + res.add(new Tuple3<>(dedupId, pivotDedupId, null)); + } + + // add merge relations + res.add(new Tuple3<>(id, pivotDedupId, pivot)); + + return res.iterator(); + }, Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.STRING())) + .distinct() + .flatMap( + (FlatMapFunction, Relation>) (Tuple3 r) -> { + String id = r._1(); + String dedupId = r._2(); + String pivot = r._3(); + + ArrayList res = new ArrayList<>(); + res.add(rel(pivot, dedupId, id, ModelConstants.MERGES, dedupConf)); + res.add(rel(pivot, id, dedupId, ModelConstants.IS_MERGED_IN, dedupConf)); + + return res.iterator(); + }, Encoders.bean(Relation.class)); + + saveParquet(output, mergeRelPath, SaveMode.Overwrite); } } - private ConnectedComponent generateID(String key, Iterator> values) { - - List> identifiers = Lists - .newArrayList(values) - .stream() - .map(v -> Identifier.newInstance(v._2())) - .collect(Collectors.toList()); - - String rootID = IdGenerator.generate(identifiers, key); - - if (Objects.equals(rootID, key)) - throw new IllegalStateException("generated default ID: " + rootID); - - return new ConnectedComponent(rootID, - identifiers.stream().map(i -> i.getEntity().getId()).collect(Collectors.toSet())); - } - - private JavaPairRDD createVertexes(JavaSparkContext sc, String graphBasePath, String subEntity, - DedupConfig dedupConf) { - - return sc - .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .mapToPair(json -> { - String id = MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), json); - return new Tuple2<>(hash(id), id); - }); - } - - private Iterator> ccToRels(ConnectedComponent cc) { - return cc - .getIds() - .stream() - .map(id -> new Tuple2<>(cc.getCcId(), id)) - .iterator(); - } - - private Iterator ccToMergeRel(ConnectedComponent cc, DedupConfig dedupConf) { - return cc - .getIds() - .stream() - .flatMap( - id -> { - List tmp = new ArrayList<>(); - - tmp.add(rel(cc.getCcId(), id, ModelConstants.MERGES, dedupConf)); - tmp.add(rel(id, cc.getCcId(), ModelConstants.IS_MERGED_IN, dedupConf)); - - return tmp.stream(); - }) - .iterator(); - } - - private Relation rel(String source, String target, String relClass, DedupConfig dedupConf) { + private static Relation rel(String pivot, String source, String target, String relClass, DedupConfig dedupConf) { String entityType = dedupConf.getWf().getEntityType(); @@ -238,6 +294,14 @@ public class SparkCreateMergeRels extends AbstractSparkAction { // TODO calculate the trust value based on the similarity score of the elements in the CC r.setDataInfo(info); + + if (pivot != null) { + KeyValue pivotKV = new KeyValue(); + pivotKV.setKey("pivot"); + pivotKV.setValue(pivot); + + r.setProperties(Arrays.asList(pivotKV)); + } return r; } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java index 65ad0c327..60752a457 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -91,18 +91,12 @@ public class SparkWhitelistSimRels extends AbstractSparkAction { Dataset entities = spark .read() .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .repartition(numPartitions) - .withColumn("id", functions.get_json_object(new Column("value"), dedupConf.getWf().getIdPath())); + .select(functions.get_json_object(new Column("value"), dedupConf.getWf().getIdPath()).as("id")) + .distinct(); - Dataset whiteListRels1 = whiteListRels - .join(entities, entities.col("id").equalTo(whiteListRels.col("from")), "inner") - .select("from", "to"); - - Dataset whiteListRels2 = whiteListRels1 - .join(entities, whiteListRels1.col("to").equalTo(entities.col("id")), "inner") - .select("from", "to"); - - Dataset whiteListSimRels = whiteListRels2 + Dataset whiteListSimRels = whiteListRels + .join(entities, entities.col("id").equalTo(whiteListRels.col("from")), "leftsemi") + .join(entities, functions.col("to").equalTo(entities.col("id")), "leftsemi") .map( (MapFunction) r -> DedupUtility .createSimRel(r.getString(0), r.getString(1), entity), diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java deleted file mode 100644 index 4a39a175d..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java +++ /dev/null @@ -1,100 +0,0 @@ - -package eu.dnetlib.dhp.oa.dedup.graph; - -import java.io.IOException; -import java.io.Serializable; -import java.util.Set; -import java.util.stream.Collectors; - -import org.apache.commons.lang3.StringUtils; -import org.codehaus.jackson.annotate.JsonIgnore; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.pace.util.PaceException; - -public class ConnectedComponent implements Serializable { - - private String ccId; - private Set ids; - - private static final String CONNECTED_COMPONENT_ID_PREFIX = "connect_comp"; - - public ConnectedComponent(Set ids, final int cut) { - this.ids = ids; - - this.ccId = createDefaultID(); - - if (cut > 0 && ids.size() > cut) { - this.ids = ids - .stream() - .filter(id -> !ccId.equalsIgnoreCase(id)) - .limit(cut - 1) - .collect(Collectors.toSet()); -// this.ids.add(ccId); ?? - } - } - - public ConnectedComponent(String ccId, Set ids) { - this.ccId = ccId; - this.ids = ids; - } - - public String createDefaultID() { - if (ids.size() > 1) { - final String s = getMin(); - String prefix = s.split("\\|")[0]; - ccId = prefix + "|" + CONNECTED_COMPONENT_ID_PREFIX + "::" + DHPUtils.md5(s); - return ccId; - } else { - return ids.iterator().next(); - } - } - - @JsonIgnore - public String getMin() { - - final StringBuilder min = new StringBuilder(); - - ids - .forEach( - id -> { - if (StringUtils.isBlank(min.toString())) { - min.append(id); - } else { - if (min.toString().compareTo(id) > 0) { - min.setLength(0); - min.append(id); - } - } - }); - return min.toString(); - } - - @Override - public String toString() { - ObjectMapper mapper = new ObjectMapper(); - try { - return mapper.writeValueAsString(this); - } catch (IOException e) { - throw new PaceException("Failed to create Json: ", e); - } - } - - public Set getIds() { - return ids; - } - - public void setIds(Set ids) { - this.ids = ids; - } - - public String getCcId() { - return ccId; - } - - public void setCcId(String ccId) { - this.ccId = ccId; - } -} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/GraphProcessor.scala b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/GraphProcessor.scala deleted file mode 100644 index f4dd85d75..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/GraphProcessor.scala +++ /dev/null @@ -1,37 +0,0 @@ -package eu.dnetlib.dhp.oa.dedup.graph - -import org.apache.spark.graphx._ -import org.apache.spark.rdd.RDD - -import scala.collection.JavaConversions; - -object GraphProcessor { - - def findCCs(vertexes: RDD[(VertexId, String)], edges: RDD[Edge[String]], maxIterations: Int, cut:Int): RDD[ConnectedComponent] = { - val graph: Graph[String, String] = Graph(vertexes, edges).partitionBy(PartitionStrategy.RandomVertexCut) //TODO remember to remove partitionby - val cc = graph.connectedComponents(maxIterations).vertices - - val joinResult = vertexes.leftOuterJoin(cc).map { - case (id, (openaireId, cc)) => { - if (cc.isEmpty) { - (id, openaireId) - } - else { - (cc.get, openaireId) - } - } - } - val connectedComponents = joinResult.groupByKey() - .map[ConnectedComponent](cc => asConnectedComponent(cc, cut)) - connectedComponents - } - - - - def asConnectedComponent(group: (VertexId, Iterable[String]), cut:Int): ConnectedComponent = { - val docs = group._2.toSet[String] - val connectedComponent = new ConnectedComponent(JavaConversions.setAsJavaSet[String](docs), cut); - connectedComponent - } - -} \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java index 0cba4fc3b..e03c3bf95 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java @@ -3,21 +3,21 @@ package eu.dnetlib.dhp.oa.dedup.model; import java.io.Serializable; import java.text.SimpleDateFormat; -import java.util.*; -import java.util.stream.Collectors; +import java.time.LocalDate; +import java.util.Date; +import java.util.List; +import java.util.Objects; import org.apache.commons.lang3.StringUtils; -import com.google.common.collect.Sets; - import eu.dnetlib.dhp.oa.dedup.DatePicker; import eu.dnetlib.dhp.oa.dedup.IdentifierComparator; import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; -import eu.dnetlib.dhp.schema.oaf.utils.PidComparator; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.PidType; public class Identifier implements Serializable, Comparable> { @@ -50,7 +50,7 @@ public class Identifier implements Serializable, Comparable if (Objects.nonNull(date)) { return date; } else { - String sDate = BASE_DATE; + String sDate = LocalDate.now().plusDays(1).toString(); if (ModelSupport.isSubClass(getEntity(), Result.class)) { Result result = (Result) getEntity(); if (isWellformed(result.getDateofacceptance())) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json index b1df08535..4f9f4b0b5 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json @@ -28,5 +28,17 @@ "paramLongName": "workingPath", "paramDescription": "path for the working directory", "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hiveMetastoreUris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "pivotHistoryDatabase", + "paramDescription": "Pivot history database", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml index 2e0ed9aee..cd29965e3 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml @@ -15,4 +15,8 @@ oozie.action.sharelib.for.spark spark2 + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml index ba2270c8a..49a331def 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml @@ -188,6 +188,8 @@ --isLookUpUrl${isLookUpUrl} --actionSetId${actionSetId} --cutConnectedComponent${cutConnectedComponent} + --hiveMetastoreUris${hiveMetastoreUris} + --pivotHistoryDatabase${pivotHistoryDatabase} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/scala/com/kwartile/lib/cc/ConnectedComponent.scala b/dhp-workflows/dhp-dedup-openaire/src/main/scala/com/kwartile/lib/cc/ConnectedComponent.scala new file mode 100644 index 000000000..4c3362235 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/scala/com/kwartile/lib/cc/ConnectedComponent.scala @@ -0,0 +1,335 @@ +/** Copyright (c) 2017 Kwartile, Inc., http://www.kwartile.com + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** Map-reduce implementation of Connected Component + * Given lists of subgraphs, returns all the nodes that are connected. + */ + +package com.kwartile.lib.cc + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{Dataset, Row, SparkSession} +import org.apache.spark.storage.StorageLevel + +import scala.annotation.tailrec +import scala.collection.mutable + +object ConnectedComponent extends Serializable { + + /** Applies Small Star operation on RDD of nodePairs + * + * @param nodePairs on which to apply Small Star operations + * @return new nodePairs after the operation and conncectivy change count + */ + private def smallStar(nodePairs: RDD[(Long, Long)]): (RDD[(Long, Long)], Long) = { + + /** generate RDD of (self, List(neighbors)) where self > neighbors + * E.g.: nodePairs (1, 4), (6, 1), (3, 2), (6, 5) + * will result into (4, List(1)), (6, List(1)), (3, List(2)), (6, List(5)) + */ + val neighbors = nodePairs.map(x => { + val (self, neighbor) = (x._1, x._2) + if (self > neighbor) + (self, neighbor) + else + (neighbor, self) + }) + + /** reduce on self to get list of all its neighbors. + * E.g: (4, List(1)), (6, List(1)), (3, List(2)), (6, List(5)) + * will result into (4, List(1)), (6, List(1, 5)), (3, List(2)) + * Note: + * (1) you may need to tweak number of partitions. + * (2) also, watch out for data skew. In that case, consider using rangePartitioner + */ + val empty = mutable.HashSet[Long]() + val allNeighbors = neighbors.aggregateByKey(empty)( + (lb, v) => lb += v, + (lb1, lb2) => lb1 ++ lb2 + ) + + /** Apply Small Star operation on (self, List(neighbor)) to get newNodePairs and count the change in connectivity + */ + + val newNodePairsWithChangeCount = allNeighbors + .map(x => { + val self = x._1 + val neighbors = x._2.toList + val minNode = argMin(self :: neighbors) + val newNodePairs = (self :: neighbors) + .map(neighbor => { + (neighbor, minNode) + }) + .filter(x => { + val neighbor = x._1 + val minNode = x._2 + (neighbor <= self && neighbor != minNode) || (self == neighbor) + }) + val uniqueNewNodePairs = newNodePairs.toSet.toList + + /** We count the change by taking a diff of the new node pairs with the old node pairs + */ + val connectivityChangeCount = (uniqueNewNodePairs diff neighbors.map((self, _))).length + (uniqueNewNodePairs, connectivityChangeCount) + }) + .persist(StorageLevel.MEMORY_AND_DISK_SER) + + /** Sum all the changeCounts + */ + val totalConnectivityCountChange = newNodePairsWithChangeCount + .mapPartitions(iter => { + val (v, l) = iter.toSeq.unzip + val sum = l.sum + Iterator(sum) + }) + .sum + .toLong + + val newNodePairs = newNodePairsWithChangeCount.map(x => x._1).flatMap(x => x) + newNodePairsWithChangeCount.unpersist(false) + (newNodePairs, totalConnectivityCountChange) + } + + /** Apply Large Star operation on a RDD of nodePairs + * + * @param nodePairs on which to apply Large Star operations + * @return new nodePairs after the operation and conncectivy change count + */ + private def largeStar(nodePairs: RDD[(Long, Long)]): (RDD[(Long, Long)], Long) = { + + /** generate RDD of (self, List(neighbors)) + * E.g.: nodePairs (1, 4), (6, 1), (3, 2), (6, 5) + * will result into (4, List(1)), (1, List(4)), (6, List(1)), (1, List(6)), (3, List(2)), (2, List(3)), (6, List(5)), (5, List(6)) + */ + + val neighbors = nodePairs.flatMap(x => { + val (self, neighbor) = (x._1, x._2) + if (self == neighbor) + List((self, neighbor)) + else + List((self, neighbor), (neighbor, self)) + }) + + /** reduce on self to get list of all its neighbors. + * E.g: (4, List(1)), (1, List(4)), (6, List(1)), (1, List(6)), (3, List(2)), (2, List(3)), (6, List(5)), (5, List(6)) + * will result into (4, List(1)), (1, List(4, 6)), (6, List(1, 5)), (3, List(2)), (2, List(3)), (5, List(6)) + * Note: + * (1) you may need to tweak number of partitions. + * (2) also, watch out for data skew. In that case, consider using rangePartitioner + */ + + val localAdd = (s: mutable.HashSet[Long], v: Long) => s += v + val partitionAdd = (s1: mutable.HashSet[Long], s2: mutable.HashSet[Long]) => s1 ++= s2 + val allNeighbors = + neighbors.aggregateByKey(mutable.HashSet.empty[Long] /*, rangePartitioner*/ )(localAdd, partitionAdd) + + /** Apply Large Star operation on (self, List(neighbor)) to get newNodePairs and count the change in connectivity + */ + + val newNodePairsWithChangeCount = allNeighbors + .map(x => { + val self = x._1 + val neighbors = x._2.toList + val minNode = argMin(self :: neighbors) + val newNodePairs = (self :: neighbors) + .map(neighbor => { + (neighbor, minNode) + }) + .filter(x => { + val neighbor = x._1 + val minNode = x._2 + neighbor > self || neighbor == minNode + }) + + val uniqueNewNodePairs = newNodePairs.toSet.toList + val connectivityChangeCount = (uniqueNewNodePairs diff neighbors.map((self, _))).length + (uniqueNewNodePairs, connectivityChangeCount) + }) + .persist(StorageLevel.MEMORY_AND_DISK_SER) + + val totalConnectivityCountChange = newNodePairsWithChangeCount + .mapPartitions(iter => { + val (v, l) = iter.toSeq.unzip + val sum = l.sum + Iterator(sum) + }) + .sum + .toLong + + /** Sum all the changeCounts + */ + val newNodePairs = newNodePairsWithChangeCount.map(x => x._1).flatMap(x => x) + newNodePairsWithChangeCount.unpersist(false) + (newNodePairs, totalConnectivityCountChange) + } + + private def argMin(nodes: List[Long]): Long = { + nodes.min(Ordering.by((node: Long) => node)) + } + + /** Build nodePairs given a list of nodes. A list of nodes represents a subgraph. + * + * @param nodes that are part of a subgraph + * @return nodePairs for a subgraph + */ + private def buildPairs(nodes: List[Long]): List[(Long, Long)] = { + buildPairs(nodes.head, nodes.tail, null.asInstanceOf[List[(Long, Long)]]) + } + + @tailrec + private def buildPairs(node: Long, neighbors: List[Long], partialPairs: List[(Long, Long)]): List[(Long, Long)] = { + if (neighbors.isEmpty) { + if (partialPairs != null) + List((node, node)) ::: partialPairs + else + List((node, node)) + } else if (neighbors.length == 1) { + val neighbor = neighbors(0) + if (node > neighbor) + if (partialPairs != null) List((node, neighbor)) ::: partialPairs else List((node, neighbor)) + else if (partialPairs != null) List((neighbor, node)) ::: partialPairs + else List((neighbor, node)) + } else { + val newPartialPairs = neighbors + .map(neighbor => { + if (node > neighbor) + List((node, neighbor)) + else + List((neighbor, node)) + }) + .flatMap(x => x) + + if (partialPairs != null) + buildPairs(neighbors.head, neighbors.tail, newPartialPairs ::: partialPairs) + else + buildPairs(neighbors.head, neighbors.tail, newPartialPairs) + } + } + + /** Implements alternatingAlgo. Converges when the changeCount is either 0 or does not change from the previous iteration + * + * @param nodePairs for a graph + * @param largeStarConnectivityChangeCount change count that resulted from the previous iteration + * @param smallStarConnectivityChangeCount change count that resulted from the previous iteration + * @param didConverge flag to indicate the alorigth converged + * @param currIterationCount counter to capture number of iterations + * @param maxIterationCount maximum number iterations to try before giving up + * @return RDD of nodePairs + */ + + @tailrec + private def alternatingAlgo( + nodePairs: RDD[(Long, Long)], + largeStarConnectivityChangeCount: Long, + smallStarConnectivityChangeCount: Long, + didConverge: Boolean, + currIterationCount: Int, + maxIterationCount: Int + ): (RDD[(Long, Long)], Boolean, Long) = { + + val iterationCount = currIterationCount + 1 + if (didConverge) + (nodePairs, true, currIterationCount) + else if (currIterationCount >= maxIterationCount) { + (nodePairs, false, currIterationCount) + } else { + + val (nodePairsLargeStar, currLargeStarConnectivityChangeCount) = largeStar(nodePairs) + val (nodePairsSmallStar, currSmallStarConnectivityChangeCount) = smallStar(nodePairsLargeStar) + + if ( + (currLargeStarConnectivityChangeCount == largeStarConnectivityChangeCount && + currSmallStarConnectivityChangeCount == smallStarConnectivityChangeCount) || + (currSmallStarConnectivityChangeCount == 0 && currLargeStarConnectivityChangeCount == 0) + ) { + alternatingAlgo( + nodePairsSmallStar, + currLargeStarConnectivityChangeCount, + currSmallStarConnectivityChangeCount, + true, + iterationCount, + maxIterationCount + ) + } else { + alternatingAlgo( + nodePairsSmallStar, + currLargeStarConnectivityChangeCount, + currSmallStarConnectivityChangeCount, + false, + iterationCount, + maxIterationCount + ) + } + } + } + + /** Driver function + * + * @param cliques list of nodes representing subgraphs (or cliques) + * @param maxIterationCount maximum number iterations to try before giving up + * @return Connected Components as nodePairs where second member of the nodePair is the minimum node in the component + */ + def run(cliques: RDD[List[Long]], maxIterationCount: Int): (RDD[(Long, Long)], Boolean, Long) = { + + val nodePairs = cliques + .map(aClique => { + buildPairs(aClique) + }) + .flatMap(x => x) + + val (cc, didConverge, iterCount) = alternatingAlgo(nodePairs, 9999999L, 9999999L, false, 0, maxIterationCount) + + if (didConverge) { + (cc, didConverge, iterCount) + } else { + (null.asInstanceOf[RDD[(Long, Long)]], didConverge, iterCount) + } + } + + def runOnPairs(nodePairs: RDD[(Long, Long)], maxIterationCount: Int): (RDD[(Long, Long)], Boolean, Long) = { + val (cc, didConverge, iterCount) = alternatingAlgo(nodePairs, 9999999L, 9999999L, false, 0, maxIterationCount) + + if (didConverge) { + (cc, didConverge, iterCount) + } else { + (null.asInstanceOf[RDD[(Long, Long)]], didConverge, iterCount) + } + } + + def runOnPairs(nodePairs: Dataset[Row], maxIterationCount: Int)(implicit spark: SparkSession): Dataset[Row] = { + import spark.implicits._ + + val (cc, didConverge, iterCount) = alternatingAlgo( + nodePairs.map(e => (e.getLong(0), e.getLong(1))).rdd, + 9999999L, + 9999999L, + false, + 0, + maxIterationCount + ) + + if (didConverge) { + cc.toDF("vertexId", "groupId") + } else { + null.asInstanceOf[Dataset[Row]] + } + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 6c4935637..bd5a04e62 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -41,9 +41,13 @@ import com.google.common.collect.Sets; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.schema.sx.OafUtils; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import scala.Tuple2; @ExtendWith(MockitoExtension.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class) @@ -97,6 +101,7 @@ public class SparkDedupTest implements Serializable { final SparkConf conf = new SparkConf(); conf.set("spark.sql.shuffle.partitions", "200"); + conf.set("spark.sql.warehouse.dir", testOutputBasePath + "/spark-warehouse"); spark = SparkSession .builder() .appName(SparkDedupTest.class.getSimpleName()) @@ -186,11 +191,11 @@ public class SparkDedupTest implements Serializable { System.out.println("ds_simrel = " + ds_simrel); System.out.println("orp_simrel = " + orp_simrel); - assertEquals(1538, orgs_simrel); - assertEquals(3523, pubs_simrel); - assertEquals(168, sw_simrel); - assertEquals(221, ds_simrel); - assertEquals(3392, orp_simrel); + assertEquals(751, orgs_simrel); + assertEquals(546, pubs_simrel); + assertEquals(113, sw_simrel); + assertEquals(148, ds_simrel); + assertEquals(280, orp_simrel); } @@ -235,10 +240,10 @@ public class SparkDedupTest implements Serializable { .count(); // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) - assertEquals(1538, orgs_simrel); - assertEquals(3523, pubs_simrel); - assertEquals(221, ds_simrel); - assertEquals(3392, orp_simrel); + assertEquals(751, orgs_simrel); + assertEquals(546, pubs_simrel); + assertEquals(148, ds_simrel); + assertEquals(280, orp_simrel); // System.out.println("orgs_simrel = " + orgs_simrel); // System.out.println("pubs_simrel = " + pubs_simrel); // System.out.println("ds_simrel = " + ds_simrel); @@ -268,7 +273,7 @@ public class SparkDedupTest implements Serializable { && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])) .count() > 0); - assertEquals(170, sw_simrel.count()); + assertEquals(115, sw_simrel.count()); // System.out.println("sw_simrel = " + sw_simrel.count()); } @@ -292,7 +297,9 @@ public class SparkDedupTest implements Serializable { "-w", testOutputBasePath, "-cc", - "3" + "3", + "-h", + "" }); new SparkCreateMergeRels(parser, spark).run(isLookUpService); @@ -365,6 +372,113 @@ public class SparkDedupTest implements Serializable { .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); } + @Test + @Order(3) + void createMergeRelsWithPivotHistoryTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json")); + + spark.sql("CREATE DATABASE IF NOT EXISTS pivot_history_test"); + ModelSupport.oafTypes.keySet().forEach(entityType -> { + try { + spark + .read() + .json( + Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/pivot_history").toURI()) + .toFile() + .getAbsolutePath()) + .write() + .mode("overwrite") + .saveAsTable("pivot_history_test." + entityType); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + }); + + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath, + "-h", + "", + "-pivotHistoryDatabase", + "pivot_history_test" + + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .count(); + final Dataset pubs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .count(); + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .count(); + + final List merges = pubs + .filter("source == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'") + .collectAsList(); + assertEquals(3, merges.size()); + Set dups = Sets + .newHashSet( + "50|doi_________::3b1d0d8e8f930826665df9d6b82fbb73", + "50|doi_________::d5021b53204e4fdeab6ff5d5bc468032", + "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c"); + merges.forEach(r -> { + assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); + assertEquals(ModelConstants.DEDUP, r.getSubRelType()); + assertEquals(ModelConstants.MERGES, r.getRelClass()); + assertTrue(dups.contains(r.getTarget())); + }); + + final List mergedIn = pubs + .filter("target == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'") + .collectAsList(); + assertEquals(3, mergedIn.size()); + mergedIn.forEach(r -> { + assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); + assertEquals(ModelConstants.DEDUP, r.getSubRelType()); + assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass()); + assertTrue(dups.contains(r.getSource())); + }); + + assertEquals(1268, orgs_mergerel); + assertEquals(1112, pubs.count()); + assertEquals(292, sw_mergerel); + assertEquals(476, ds_mergerel); + assertEquals(742, orp_mergerel); +// System.out.println("orgs_mergerel = " + orgs_mergerel); +// System.out.println("pubs_mergerel = " + pubs_mergerel); +// System.out.println("sw_mergerel = " + sw_mergerel); +// System.out.println("ds_mergerel = " + ds_mergerel); +// System.out.println("orp_mergerel = " + orp_mergerel); + + } + @Test @Order(4) void createMergeRelsTest() throws Exception { @@ -382,7 +496,9 @@ public class SparkDedupTest implements Serializable { "-la", "lookupurl", "-w", - testOutputBasePath + testOutputBasePath, + "-h", + "" }); new SparkCreateMergeRels(parser, spark).run(isLookUpService); @@ -437,10 +553,10 @@ public class SparkDedupTest implements Serializable { }); assertEquals(1268, orgs_mergerel); - assertEquals(1450, pubs.count()); - assertEquals(286, sw_mergerel); - assertEquals(472, ds_mergerel); - assertEquals(738, orp_mergerel); + assertEquals(1112, pubs.count()); + assertEquals(292, sw_mergerel); + assertEquals(476, ds_mergerel); + assertEquals(742, orp_mergerel); // System.out.println("orgs_mergerel = " + orgs_mergerel); // System.out.println("pubs_mergerel = " + pubs_mergerel); // System.out.println("sw_mergerel = " + sw_mergerel); @@ -492,10 +608,10 @@ public class SparkDedupTest implements Serializable { .count(); assertEquals(86, orgs_deduprecord); - assertEquals(68, pubs.count()); - assertEquals(49, sw_deduprecord); + assertEquals(91, pubs.count()); + assertEquals(47, sw_deduprecord); assertEquals(97, ds_deduprecord); - assertEquals(92, orp_deduprecord); + assertEquals(93, orp_deduprecord); verifyRoot_1(mapper, pubs); @@ -629,13 +745,13 @@ public class SparkDedupTest implements Serializable { .distinct() .count(); - assertEquals(902, publications); + assertEquals(925, publications); assertEquals(839, organizations); assertEquals(100, projects); assertEquals(100, datasource); - assertEquals(198, softwares); + assertEquals(196, softwares); assertEquals(389, dataset); - assertEquals(520, otherresearchproduct); + assertEquals(521, otherresearchproduct); // System.out.println("publications = " + publications); // System.out.println("organizations = " + organizations); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json index fa889d63b..ff6670f1e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json @@ -101,7 +101,8 @@ "type" : "String", "path" : "$.title[?(@.qualifier.classid == 'main title')].value", "length" : 250, - "size" : 5 + "size" : 5, + "clean": "title" }, { "name" : "authors", diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json index b45b6ae83..a4a3761a3 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json @@ -101,7 +101,8 @@ "type" : "String", "path" : "$.title[?(@.qualifier.classid == 'main title')].value", "length" : 250, - "size" : 5 + "size" : 5, + "clean": "title" }, { "name" : "authors", diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json index 15ebc7a6a..c3a769874 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json @@ -29,9 +29,8 @@ }, "pace": { "clustering" : [ - { "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} }, - { "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } }, - { "name" : "lowercase", "fields" : [ "doi" ], "params" : { } } + { "name" : "numAuthorsTitleSuffixPrefixChain", "fields" : [ "num_authors", "title" ], "params" : { "mod" : "10" } }, + { "name" : "jsonlistclustering", "fields" : [ "pid" ], "params" : { "jpath_value": "$.value", "jpath_classid": "$.qualifier.classid"} } ], "decisionTree": { "start": { @@ -79,13 +78,37 @@ "ignoreUndefined": "false" }, "layer3": { + "fields": [ + { + "field": "authors", + "comparator": "authorsMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "surname_th": 0.75, + "fullname_th": 0.75, + "threshold": 0.6, + "mode": "full" + } + } + ], + "threshold": 0.6, + "aggregation": "MAX", + "positive": "layer4", + "negative": "NO_MATCH", + "undefined": "MATCH", + "ignoreUndefined": "true" + }, + "layer4": { "fields": [ { "field": "title", "comparator": "levensteinTitle", "weight": 1.0, "countIfUndefined": "true", - "params": {} + "params": { + "threshold": "0.99" + } } ], "threshold": 0.99, @@ -97,23 +120,25 @@ } }, "model": [ - { - "name": "doi", - "type": "String", - "path": "$.pid[?(@.qualifier.classid == 'doi')].value" - }, { "name": "pid", "type": "JSON", "path": "$.pid", "overrideMatch": "true" }, + { + "name": "alternateid", + "type": "JSON", + "path": "$.instance[*].alternateIdentifier[*]", + "overrideMatch": "true" + }, { "name": "title", "type": "String", "path": "$.title[?(@.qualifier.classid == 'main title')].value", "length": 250, - "size": 5 + "size": 5, + "clean": "title" }, { "name": "authors", @@ -122,9 +147,9 @@ "size": 200 }, { - "name": "resulttype", + "name": "num_authors", "type": "String", - "path": "$.resulttype.classid" + "path": "$.author.length()" } ], "blacklists": { diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json index f53ff385f..3c6c8aa5f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json @@ -75,7 +75,8 @@ "type" : "String", "path" : "$.title[?(@.qualifier.classid == 'main title')].value", "length" : 250, - "size" : 5 + "size" : 5, + "clean": "title" }, { "name" : "url", diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/pivot_history/pivot_history.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/pivot_history/pivot_history.json new file mode 100644 index 000000000..8af1a6d06 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/pivot_history/pivot_history.json @@ -0,0 +1 @@ +{"id": "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c", "firstUsage": "2022-01-01", "lastUsage": "2022-01-01", "dedupId": "50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c" } \ No newline at end of file diff --git a/pom.xml b/pom.xml index 3fd351c1d..6ef320253 100644 --- a/pom.xml +++ b/pom.xml @@ -931,5 +931,25 @@ --> + + + + arm-silicon-mac + + + aarch64 + mac + + + + + + org.xerial.snappy + snappy-java + 1.1.8.4 + + + + \ No newline at end of file From 1287315ffb546397bcbcac588fd5b80a62cab665 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Mon, 11 Dec 2023 21:26:05 +0100 Subject: [PATCH 23/57] Do no longer use dedupId information from pivotHistory Database --- .../dhp/oa/dedup/SparkCreateMergeRels.java | 85 ++++++++++--------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index 5bb132b89..46c29494e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -1,24 +1,23 @@ package eu.dnetlib.dhp.oa.dedup; -import com.google.common.hash.Hashing; -import com.kwartile.lib.cc.ConnectedComponent; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.config.DedupConfig; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVENANCE_DEDUP; +import static org.apache.spark.sql.functions.*; + +import java.io.IOException; +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.Dataset; import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.catalyst.encoders.RowEncoder; import org.apache.spark.sql.expressions.UserDefinedFunction; import org.apache.spark.sql.expressions.Window; @@ -29,20 +28,23 @@ import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; + +import com.google.common.hash.Hashing; +import com.kwartile.lib.cc.ConnectedComponent; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.EntityType; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.pace.config.DedupConfig; import scala.Tuple3; import scala.collection.JavaConversions; -import java.io.IOException; -import java.time.LocalDate; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Optional; - -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; -import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVENANCE_DEDUP; -import static org.apache.spark.sql.functions.*; - public class SparkCreateMergeRels extends AbstractSparkAction { private static final Logger log = LoggerFactory.getLogger(SparkCreateMergeRels.class); @@ -121,6 +123,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .distinct() .withColumn("vertexId", hashUDF.apply(functions.col("id"))); + // transform simrels into pairs of numeric ids final Dataset edges = spark .read() .load(DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity)) @@ -128,27 +131,34 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .withColumn("source", hashUDF.apply(functions.col("source"))) .withColumn("target", hashUDF.apply(functions.col("target"))); + // resolve connected components + // ("vertexId", "groupId") Dataset cliques = ConnectedComponent .runOnPairs(edges, 50, spark); + // transform "vertexId" back to its original string value + // groupId is kept numeric as its string value is not used + // ("id", "groupId") Dataset rawMergeRels = cliques .join(vertexIdMap, JavaConversions.asScalaBuffer(Collections.singletonList("vertexId")), "inner") .drop("vertexId") .distinct(); + // empty dataframe if historydatabase is not used Dataset pivotHistory = spark .createDataset( Collections.emptyList(), RowEncoder - .apply(StructType.fromDDL("id STRING, firstUsage STRING, lastUsage STRING, dedupId STRING"))); + .apply(StructType.fromDDL("id STRING, lastUsage STRING"))); if (StringUtils.isNotBlank(pivotHistoryDatabase)) { pivotHistory = spark .read() .table(pivotHistoryDatabase + "." + subEntity) - .selectExpr("id", "lastUsage", "dedupId"); + .selectExpr("id", "lastUsage"); } + // depending on resulttype collectefrom and dateofacceptance are evaluated differently String collectedfromExpr = "false AS collectedfrom"; String dateExpr = "'' AS date"; @@ -164,8 +174,10 @@ public class SparkCreateMergeRels extends AbstractSparkAction { dateExpr = "dateofacceptance.value AS date"; } + // cap pidType at w3id as from there on they are considered equal UserDefinedFunction mapPid = udf( (String s) -> Math.min(PidType.tryValueOf(s).ordinal(), PidType.w3id.ordinal()), DataTypes.IntegerType); + UserDefinedFunction validDate = udf((String date) -> { if (StringUtils.isNotBlank(date) && date.matches(DatePicker.DATE_PATTERN) && DatePicker.inRange(date)) { @@ -186,8 +198,6 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .withColumn("pidType", mapPid.apply(col("pidType"))) // ordinal of pid type .withColumn("date", validDate.apply(col("date"))); - UserDefinedFunction generateDedupId = udf((String s) -> IdGenerator.generate(s), DataTypes.StringType); - // ordering to selected pivot id WindowSpec w = Window .partitionBy("groupId") @@ -202,17 +212,15 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .join(pivotHistory, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "full") .join(pivotingData, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") .withColumn("pivot", functions.first("id").over(w)) - .withColumn("pivotDedupId", functions.first("dedupId").over(w)) .withColumn("position", functions.row_number().over(w)) - .filter(cut > 0 ? col("position").lt(lit(cut)) : lit(true)) - // .select("id", "groupId", "collectedfrom", "pivot", "dedupId", "pivotDedupId") - // .distinct() + .filter(cut > 0 ? col("position").lt(lit(cut)) : lit(true)) // apply cut after choosing pivot .flatMap( (FlatMapFunction>) (Row r) -> { String id = r.getAs("id"); + String dedupId = IdGenerator.generate(id); + String pivot = r.getAs("pivot"); - String pivotDedupId = r.getAs("pivotDedupId"); // dedupId associated with the pivot - String dedupId = r.getAs("dedupId"); // dedupId associated with this id if it was a pivot + String pivotDedupId = IdGenerator.generate(pivot); // filter out id == pivotDedupId // those are caused by claim expressed on pivotDedupId @@ -233,14 +241,9 @@ public class SparkCreateMergeRels extends AbstractSparkAction { return res.iterator(); } - // new pivot, assign pivotDedupId with current IdGenerator - if (StringUtils.isBlank(pivotDedupId)) { - pivotDedupId = IdGenerator.generate(pivot); - } - - // this was a pivot in a preceding graph but it has been merged into a new group with different + // this was a pivot in a previous graph but it has been merged into a new group with different // pivot - if (StringUtils.isNotBlank(dedupId) && !pivot.equals(id) && !dedupId.equals(pivotDedupId)) { + if (!r.isNullAt(r.fieldIndex("lastUsage")) && !pivot.equals(id) && !dedupId.equals(pivotDedupId)) { // materialize the previous dedup record as a merge relation with the new one res.add(new Tuple3<>(dedupId, pivotDedupId, null)); } From 831cc1fddececffc80701931ad9dab4d9926192b Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 14 Dec 2023 11:51:02 +0100 Subject: [PATCH 24/57] Generate "merged" dedup id relations also for records that are filtered out by the cut parameters --- .../java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index 46c29494e..191870d3b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -213,7 +213,6 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .join(pivotingData, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") .withColumn("pivot", functions.first("id").over(w)) .withColumn("position", functions.row_number().over(w)) - .filter(cut > 0 ? col("position").lt(lit(cut)) : lit(true)) // apply cut after choosing pivot .flatMap( (FlatMapFunction>) (Row r) -> { String id = r.getAs("id"); @@ -249,7 +248,9 @@ public class SparkCreateMergeRels extends AbstractSparkAction { } // add merge relations - res.add(new Tuple3<>(id, pivotDedupId, pivot)); + if (cut <=0 || r.getAs("position") <= cut) { + res.add(new Tuple3<>(id, pivotDedupId, pivot)); + } return res.iterator(); }, Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.STRING())) From 10e135db1eb26cf6383d02f2318c8e6701631553 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Fri, 22 Dec 2023 09:55:10 +0100 Subject: [PATCH 25/57] Use dedup_wf_002 in place of dedup_wf_001 to make explicit a different algorithm has been used to generate those kind of ids --- .../src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java index 37e1bfd15..1d3d4afdd 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java @@ -40,7 +40,7 @@ public class IdGenerator implements Serializable { if (PidType.isValid(pidType)) { return prefix + "|" + dedupify(ns) + "::" + suffix; } else { - return prefix + "|dedup_wf_001::" + md5(originalId); // hash the whole originalId to avoid collisions + return prefix + "|dedup_wf_002::" + md5(originalId); // hash the whole originalId to avoid collisions } } From 3c66e3bd7bd7fbe14f068b5176ae3681e941fda9 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Fri, 22 Dec 2023 09:57:30 +0100 Subject: [PATCH 26/57] Create dedup record for "merged" pivots Do not create dedup records for group that have more than 20 different acceptance date --- .../dhp/oa/dedup/DedupRecordFactory.java | 255 +++++++++++------- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 4 +- 2 files changed, 158 insertions(+), 101 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index d9fb24078..4c12d1dc6 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -1,130 +1,187 @@ package eu.dnetlib.dhp.oa.dedup; -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.ReduceFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; - -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Result; +import org.apache.commons.beanutils.BeanUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.ReduceFunction; +import org.apache.spark.sql.*; import scala.Tuple2; +import scala.Tuple3; +import scala.collection.JavaConversions; + +import java.util.*; +import java.util.stream.Stream; public class DedupRecordFactory { + public static final class DedupRecordReduceState { + public final String dedupId; - protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + public final ArrayList aliases = new ArrayList<>(); - private DedupRecordFactory() { - } + public final HashSet acceptanceDate = new HashSet<>(); - public static Dataset createDedupRecord( - final SparkSession spark, - final DataInfo dataInfo, - final String mergeRelsInputPath, - final String entitiesInputPath, - final Class clazz) { + public OafEntity entity; - long ts = System.currentTimeMillis(); + public DedupRecordReduceState(String dedupId, String id, OafEntity entity) { + this.dedupId = dedupId; + this.entity = entity; + if (entity == null) { + aliases.add(id); + } else { + if (Result.class.isAssignableFrom(entity.getClass())) { + Result result = (Result) entity; + if (result.getDateofacceptance() != null && StringUtils.isNotBlank(result.getDateofacceptance().getValue())) { + acceptanceDate.add(result.getDateofacceptance().getValue()); + } + } + } + } - // - Dataset entities = spark - .read() - .schema(Encoders.bean(clazz).schema()) - .json(entitiesInputPath) - .as(Encoders.bean(clazz)) - .map( - (MapFunction>) entity -> { - return new Tuple2<>(entity.getId(), entity); - }, - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) - .selectExpr("_1 AS id", "_2 AS kryoObject"); + public String getDedupId() { + return dedupId; + } + } + private static final int MAX_ACCEPTANCE_DATE = 20; - // : source is the dedup_id, target is the id of the mergedIn - Dataset mergeRels = spark - .read() - .load(mergeRelsInputPath) - .where("relClass == 'merges'") - .selectExpr("source as dedupId", "target as id"); + private DedupRecordFactory() { + } - return mergeRels - .join(entities, "id") - .select("dedupId", "kryoObject") - .as(Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) - .groupByKey((MapFunction, String>) Tuple2::_1, Encoders.STRING()) - .reduceGroups( - (ReduceFunction>) (t1, t2) -> new Tuple2<>(t1._1(), - reduceEntity(t1._1(), t1._2(), t2._2(), clazz))) - .map( - (MapFunction>, T>) t -> { - T res = t._2()._2(); - res.setDataInfo(dataInfo); - res.setLastupdatetimestamp(ts); - return res; - }, - Encoders.bean(clazz)); - } + public static Dataset createDedupRecord( + final SparkSession spark, + final DataInfo dataInfo, + final String mergeRelsInputPath, + final String entitiesInputPath, + final Class clazz) { - public static T reduceEntity( - String id, T entity, T duplicate, Class clazz) { + final long ts = System.currentTimeMillis(); + final Encoder beanEncoder = Encoders.bean(clazz); + final Encoder kryoEncoder = Encoders.kryo(clazz); - int compare = new IdentifierComparator() - .compare(Identifier.newInstance(entity), Identifier.newInstance(duplicate)); + // + Dataset entities = spark + .read() + .schema(Encoders.bean(clazz).schema()) + .json(entitiesInputPath) + .as(beanEncoder) + .map( + (MapFunction>) entity -> { + return new Tuple2<>(entity.getId(), entity); + }, + Encoders.tuple(Encoders.STRING(), kryoEncoder)) + .selectExpr("_1 AS id", "_2 AS kryoObject"); - if (compare > 0) { - T swap = duplicate; - duplicate = entity; - entity = swap; + // : source is the dedup_id, target is the id of the mergedIn + Dataset mergeRels = spark + .read() + .load(mergeRelsInputPath) + .where("relClass == 'merges'") + .selectExpr("source as dedupId", "target as id"); + + return mergeRels + .join(entities, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") + .select("dedupId", "id", "kryoObject") + .as(Encoders.tuple(Encoders.STRING(), Encoders.STRING(), kryoEncoder)) + .map((MapFunction, DedupRecordReduceState>) t -> new DedupRecordReduceState(t._1(), t._2(), t._3()), Encoders.kryo(DedupRecordReduceState.class)) + .groupByKey((MapFunction) DedupRecordReduceState::getDedupId, Encoders.STRING()) + .reduceGroups( + (ReduceFunction) (t1, t2) -> { + if (t1.entity == null) { + t2.aliases.addAll(t1.aliases); + return t2; + } + if (t1.acceptanceDate.size() < MAX_ACCEPTANCE_DATE) { + t1.acceptanceDate.addAll(t2.acceptanceDate); + } + t1.aliases.addAll(t2.aliases); + t1.entity = reduceEntity(t1.entity, t2.entity); + + return t1; + } + ) + .flatMap + ((FlatMapFunction, OafEntity>) t -> { + String dedupId = t._1(); + DedupRecordReduceState agg = t._2(); + + if (agg.acceptanceDate.size() >= MAX_ACCEPTANCE_DATE) { + return Collections.emptyIterator(); + } + + return Stream.concat(Stream.of(agg.getDedupId()), agg.aliases.stream()) + .map(id -> { + try { + OafEntity res = (OafEntity) BeanUtils.cloneBean(agg.entity); + res.setId(id); + res.setDataInfo(dataInfo); + res.setLastupdatetimestamp(ts); + return res; + } catch (Exception e) { + throw new RuntimeException(e); + } + }).iterator(); + }, beanEncoder); + } + + private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) { + + if (duplicate == null) { + return entity; } - entity.mergeFrom(duplicate); - entity.setId(id); - if (ModelSupport.isSubClass(duplicate, Result.class)) { - Result re = (Result) entity; - Result rd = (Result) duplicate; + int compare = new IdentifierComparator<>() + .compare(Identifier.newInstance(entity), Identifier.newInstance(duplicate)); - List> authors = new ArrayList<>(); - if (re.getAuthor() != null) { - authors.add(re.getAuthor()); - } - if (rd.getAuthor() != null) { - authors.add(rd.getAuthor()); - } + if (compare > 0) { + OafEntity swap = duplicate; + duplicate = entity; + entity = swap; + } - re.setAuthor(AuthorMerger.merge(authors)); - } + entity.mergeFrom(duplicate); - return entity; - } + if (ModelSupport.isSubClass(duplicate, Result.class)) { + Result re = (Result) entity; + Result rd = (Result) duplicate; - public static T entityMerger( - String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) - throws IllegalAccessException, InstantiationException, InvocationTargetException { - T base = entities.next()._2(); + List> authors = new ArrayList<>(); + if (re.getAuthor() != null) { + authors.add(re.getAuthor()); + } + if (rd.getAuthor() != null) { + authors.add(rd.getAuthor()); + } - while (entities.hasNext()) { - T duplicate = entities.next()._2(); - if (duplicate != null) - base = reduceEntity(id, base, duplicate, clazz); - } + re.setAuthor(AuthorMerger.merge(authors)); + } - base.setDataInfo(dataInfo); - base.setLastupdatetimestamp(ts); + return entity; + } - return base; - } + public static T entityMerger( + String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) { + T base = entities.next()._2(); + + while (entities.hasNext()) { + T duplicate = entities.next()._2(); + if (duplicate != null) + base = (T) reduceEntity(base, duplicate); + } + + base.setId(id); + base.setDataInfo(dataInfo); + base.setLastupdatetimestamp(ts); + + return base; + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index bd5a04e62..8b3480e60 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -611,7 +611,7 @@ public class SparkDedupTest implements Serializable { assertEquals(91, pubs.count()); assertEquals(47, sw_deduprecord); assertEquals(97, ds_deduprecord); - assertEquals(93, orp_deduprecord); + assertEquals(92, orp_deduprecord); verifyRoot_1(mapper, pubs); @@ -751,7 +751,7 @@ public class SparkDedupTest implements Serializable { assertEquals(100, datasource); assertEquals(196, softwares); assertEquals(389, dataset); - assertEquals(521, otherresearchproduct); + assertEquals(520, otherresearchproduct); // System.out.println("publications = " + publications); // System.out.println("organizations = " + organizations); From 2753044d13da0465b8b9061e70252ed6ac69a325 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 11 Jan 2024 16:28:26 +0100 Subject: [PATCH 27/57] refined mapping for the extraction of the original resource type --- .../dhp/oa/graph/raw/OafToOafMapper.java | 30 +++++++++++++++++-- .../dhp/oa/graph/raw/OdfToOafMapper.java | 19 ++++++++---- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index a63296d18..eee518353 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Set; import java.util.stream.Collectors; +import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Element; @@ -27,6 +28,15 @@ import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; public class OafToOafMapper extends AbstractMdRecordToOafMapper { + private static Set DC_TYPE_PUBLICATION_VERSION = new HashSet<>(); + + static { + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/submittedVersion"); + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/acceptedVersion"); + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/publishedVersion"); + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/updatedVersion"); + } + public OafToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId, final boolean forceOrginalId) { super(vocs, invisible, shouldHashId, forceOrginalId); @@ -192,24 +202,40 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { /** * The Dublin Core element dc:type can be repeated, but we need to base our mapping on a single value * So this method tries to give precedence to the COAR resource type, when available. Otherwise, it looks for the - * openaire's info:eu-repo type, and as last resort picks the 1st type text available + * openaire's info:eu-repo type, but excluding the following + * + * info:eu-repo/semantics/draft + * info:eu-repo/semantics/submittedVersion + * info:eu-repo/semantics/acceptedVersion + * info:eu-repo/semantics/publishedVersion + * info:eu-repo/semantics/updatedVersion + * + * Then, it picks the 1st dc:type text available and, in case there is no dc:type element, as last resort it tries + * to extract the type from the dr:CobjCategory element + * + * Examples: * * http://purl.org/coar/resource_type/c_5794 * info:eu-repo/semantics/article * Conference article + * 0006 * * @param doc the input document * @return the chosen resource type */ @Override protected String findOriginalType(Document doc) { - return (String) doc + final String dcType = (String) doc .selectNodes("//dc:type") .stream() .map(o -> "" + ((Node) o).getText().trim()) + .filter(t -> !DC_TYPE_PUBLICATION_VERSION.contains(t)) .sorted(new OriginalTypeComparator()) .findFirst() .orElse(null); + + final String drCobjCategory = doc.valueOf("//dr:CobjCategory/text()"); + return ObjectUtils.firstNonNull(dcType, drCobjCategory); } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index e63b01a00..08529125c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -221,27 +221,36 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { } /** - * The Datacite element + * Extracts the resource type from The Datacite element * - * journal article + * journal article * * @param doc the input document * @return the chosen resource type */ @Override protected String findOriginalType(Document doc) { - return Optional + final String resourceType = Optional .ofNullable( (Element) doc .selectSingleNode( "//*[local-name()='metadata']/*[local-name() = 'resource']/*[local-name() = 'resourceType']")) .map(element -> { - final String resourceTypeURI = element.attributeValue("anyURI"); + final String resourceTypeURI = element.attributeValue("uri"); + final String resourceTypeAnyURI = element.attributeValue("anyURI"); final String resourceTypeTxt = element.getText(); + final String resourceTypeGeneral = element.attributeValue("resourceTypeGeneral"); - return ObjectUtils.firstNonNull(resourceTypeURI, resourceTypeTxt); + return ObjectUtils + .firstNonNull(resourceTypeURI, resourceTypeAnyURI, resourceTypeTxt, resourceTypeGeneral); }) .orElse(null); + + final String drCobjCategory = doc.valueOf("//dr:CobjCategory/text()"); + return ObjectUtils.firstNonNull(resourceType, drCobjCategory); } @Override From f61212593975db113209f23ca17deb69075a7446 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 12 Jan 2024 10:20:28 +0100 Subject: [PATCH 28/57] fix issue on FoS integration. Removing the null values from FoS --- .../PrepareFOSSparkJob.java | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 57ad8b96a..b1ffe7f37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -124,8 +124,19 @@ public class PrepareFOSSparkJob implements Serializable { FOSDataModel first) { level1.add(first.getLevel1()); level2.add(first.getLevel2()); - level3.add(first.getLevel3() + "@@" + first.getScoreL3()); - level4.add(first.getLevel4() + "@@" + first.getScoreL4()); + if (Optional.ofNullable(first.getLevel3()).isPresent() && + !first.getLevel3().equalsIgnoreCase(NA) && !first.getLevel3().equalsIgnoreCase(NULL) + && first.getLevel3() != null) + level3.add(first.getLevel3() + "@@" + first.getScoreL3()); + else + level3.add(NULL); + if (Optional.ofNullable(first.getLevel4()).isPresent() && + !first.getLevel4().equalsIgnoreCase(NA) && + !first.getLevel4().equalsIgnoreCase(NULL) && + first.getLevel4() != null) + level4.add(first.getLevel4() + "@@" + first.getScoreL4()); + else + level4.add(NULL); } } From 21a14fcd800944d2a7fca1c70ad77726536f2b97 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Mon, 15 Jan 2024 00:08:07 +0100 Subject: [PATCH 29/57] Reusable RunSQLSparkJob for executing SQL in Spark through Oozie Spark Actions Implements pivots table update oozie workflow --- .../eu/dnetlib/dhp/oozie/RunSQLSparkJob.java | 75 +++++++++++++++ .../dnetlib/dhp/oozie/run_sql_parameters.json | 20 ++++ .../pivothistory/oozie_app/config-default.xml | 26 +++++ .../oa/dedup/pivothistory/oozie_app/sql.sql | 62 ++++++++++++ .../dedup/pivothistory/oozie_app/workflow.xml | 95 +++++++++++++++++++ 5 files changed, 278 insertions(+) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java create mode 100644 dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/workflow.xml diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java new file mode 100644 index 000000000..ef296bfc9 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java @@ -0,0 +1,75 @@ + +package eu.dnetlib.dhp.oozie; + +import com.google.common.io.Resources; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import org.apache.commons.lang3.time.DurationFormatUtils; +import org.apache.commons.text.StringSubstitutor; +import org.apache.spark.SparkConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; + +public class RunSQLSparkJob { + private static final Logger log = LoggerFactory.getLogger(RunSQLSparkJob.class); + + private final ArgumentApplicationParser parser; + + public RunSQLSparkJob(ArgumentApplicationParser parser) { + this.parser = parser; + } + + public static void main(String[] args) throws Exception { + + Map params = new HashMap<>(); + for (int i = 0; i < args.length - 1; i++) { + if (args[i].startsWith("--")) { + params.put(args[i].substring(2), args[++i]); + } + } + + /* + * String jsonConfiguration = IOUtils .toString( Objects .requireNonNull( RunSQLSparkJob.class + * .getResourceAsStream( "/eu/dnetlib/dhp/oozie/run_sql_parameters.json"))); final ArgumentApplicationParser + * parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); + */ + + Boolean isSparkSessionManaged = Optional + .ofNullable(params.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + URL url = com.google.common.io.Resources.getResource(params.get("sql")); + String raw_sql = Resources.toString(url, StandardCharsets.UTF_8); + + String sql = StringSubstitutor.replace(raw_sql, params); + log.info("sql: {}", sql); + + SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", params.get("hiveMetastoreUris")); + + runWithSparkHiveSession( + conf, + isSparkSessionManaged, + spark -> { + for (String statement : sql.split(";\\s*/\\*\\s*EOS\\s*\\*/\\s*")) { + log.info("executing: {}", statement); + long startTime = System.currentTimeMillis(); + spark.sql(statement).show(); + log + .info( + "executed in {}", + DurationFormatUtils.formatDuration(System.currentTimeMillis() - startTime, "HH:mm:ss.S")); + } + }); + } + +} diff --git a/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json new file mode 100644 index 000000000..355f38e2f --- /dev/null +++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "hmu", + "paramLongName": "hiveMetastoreUris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "sql", + "paramLongName": "sql", + "paramDescription": "sql script to execute", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/config-default.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/config-default.xml new file mode 100644 index 000000000..17bb70647 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/config-default.xml @@ -0,0 +1,26 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + sparkSqlWarehouseDir + /user/hive/warehouse + + \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql new file mode 100644 index 000000000..86dbda1c9 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql @@ -0,0 +1,62 @@ + +CREATE TABLE `${pivot_history_db}`.`dataset_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`dataset` ON relation.source = dataset.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`dataset` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ +CREATE TABLE `${pivot_history_db}`.`publication_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`publication` ON relation.source = publication.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`publication` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ +CREATE TABLE `${pivot_history_db}`.`software_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`software` ON relation.source = software.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`software` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ +CREATE TABLE `${pivot_history_db}`.`otherresearchproduct_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`otherresearchproduct` ON relation.source = otherresearchproduct.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`otherresearchproduct` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ + + +DROP TABLE IF EXISTS `${pivot_history_db}`.`dataset_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`dataset` RENAME TO `${pivot_history_db}`.`dataset_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`dataset_new` RENAME TO `${pivot_history_db}`.`dataset`; /*EOS*/ + +DROP TABLE IF EXISTS `${pivot_history_db}`.`publication_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`publication` RENAME TO `${pivot_history_db}`.`publication_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`publication_new` RENAME TO `${pivot_history_db}`.`publication`; /*EOS*/ + +DROP TABLE IF EXISTS `${pivot_history_db}`.`software_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`software` RENAME TO `${pivot_history_db}`.`software_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`software_new` RENAME TO `${pivot_history_db}`.`software`; /*EOS*/ + +DROP TABLE IF EXISTS `${pivot_history_db}`.`otherresearchproduct_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`otherresearchproduct` RENAME TO `${pivot_history_db}`.`otherresearchproduct_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`otherresearchproduct_new` RENAME TO `${pivot_history_db}`.`otherresearchproduct`; /*EOS*/ diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/workflow.xml new file mode 100644 index 000000000..d562f088e --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/workflow.xml @@ -0,0 +1,95 @@ + + + + + pivot_history_db + + Pivot history DB on hive + + + new_graph_db + + New graph DB on hive + + + new_graph_date + + Creation date of new graph db + + + + + hiveMetastoreUris + hive server metastore URIs + + + sparkSqlWarehouseDir + + + + sparkClusterOpts + --conf spark.network.timeout=600 --conf spark.extraListeners= --conf spark.sql.queryExecutionListeners= --conf spark.yarn.historyServer.address=http://iis-cdh5-test-m3.ocean.icm.edu.pl:18088 --conf spark.eventLog.dir=hdfs://nameservice1/user/spark/applicationHistory + spark cluster-wide options + + + sparkResourceOpts + --executor-memory=3G --conf spark.executor.memoryOverhead=3G --executor-cores=6 --driver-memory=8G --driver-cores=4 + spark resource options + + + sparkApplicationOpts + --conf spark.sql.shuffle.partitions=3840 + spark resource options + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + Upgrade Pivot History + eu.dnetlib.dhp.oozie.RunSQLSparkJob + dhp-dedup-openaire-${projectVersion}.jar + + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + ${sparkClusterOpts} + ${sparkResourceOpts} + ${sparkApplicationOpts} + + --hiveMetastoreUris${hiveMetastoreUris} + --sqleu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql + --pivot_history_db${pivot_history_db} + --new_graph_db${new_graph_db} + --new_graph_date${new_graph_date} + + + + + + + \ No newline at end of file From 59eaccbd87197095c50c902458bf84777932e51f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 15 Jan 2024 17:49:54 +0100 Subject: [PATCH 30/57] [enrichment single step] refactoring to fix issue in disappeared result type --- .../main/java/eu/dnetlib/dhp/MoveResult.java | 84 +++++++++++++++++++ ...kResultToCommunityFromOrganizationJob.java | 66 +++++++++------ .../SparkResultToCommunityFromProject.java | 41 ++++----- .../eu/dnetlib/dhp/wf/main/job.properties | 6 +- .../dhp/wf/main/oozie_app/workflow.xml | 2 +- .../input_moveresult_parameters.json | 22 +++++ .../oozie_app/workflow.xml | 29 ++++++- .../oozie_app/workflow.xml | 29 ++++++- 8 files changed, 225 insertions(+), 54 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java new file mode 100644 index 000000000..5ffcf8d3f --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java @@ -0,0 +1,84 @@ + +package eu.dnetlib.dhp; + +import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; +import static eu.dnetlib.dhp.PropagationConstant.readPath; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Result; + +/** + * @author miriam.baglioni + * @Date 15/01/24 + */ +public class MoveResult implements Serializable { + private static final Logger log = LoggerFactory.getLogger(MoveResult.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkResultToCommunityFromOrganizationJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + moveResults(spark, inputPath, outputPath); + + }); + } + + public static void moveResults(SparkSession spark, String inputPath, String outputPath) { + + ModelSupport.entityTypes + .keySet() + .parallelStream() + .filter(e -> ModelSupport.isResult(e)) + // .parallelStream() + .forEach(e -> { + Class resultClazz = ModelSupport.entityTypes.get(e); + Dataset resultDataset = readPath(spark, inputPath + e.name(), resultClazz); + if (resultDataset.count() > 0) { + + resultDataset + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); + } + + }); + + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index cc87b80e5..4f755266a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -76,29 +76,41 @@ public class SparkResultToCommunityFromOrganizationJob { ModelSupport.entityTypes .keySet() .parallelStream() + .filter(e -> ModelSupport.isResult(e)) + // .parallelStream() .forEach(e -> { - if (ModelSupport.isResult(e)) { - Class resultClazz = ModelSupport.entityTypes.get(e); - removeOutputDir(spark, outputPath + e.name()); - Dataset result = readPath(spark, inputPath + e.name(), resultClazz); + // if () { + Class resultClazz = ModelSupport.entityTypes.get(e); + removeOutputDir(spark, outputPath + e.name()); + Dataset result = readPath(spark, inputPath + e.name(), resultClazz); - result - .joinWith( - possibleUpdates, - result.col("id").equalTo(possibleUpdates.col("resultId")), - "left_outer") - .map(resultCommunityFn(), Encoders.bean(resultClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + e.name()); + log.info("executing left join"); + result + .joinWith( + possibleUpdates, + result.col("id").equalTo(possibleUpdates.col("resultId")), + "left_outer") + .map(resultCommunityFn(), Encoders.bean(resultClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); - readPath(spark, outputPath + e.name(), resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(inputPath + e.name()); - } +// log +// .info( +// "reading results from " + outputPath + e.name() + " and copying them to " + inputPath +// + e.name()); +// Dataset tmp = readPath(spark, outputPath + e.name(), resultClazz); +// if (tmp.count() > 0){ +// +// tmp +// .write() +// .mode(SaveMode.Overwrite) +// .option("compression", "gzip") +// .json(inputPath + e.name()); +// } + + // } }); } @@ -115,11 +127,11 @@ public class SparkResultToCommunityFromOrganizationJob { .map(Context::getId) .collect(Collectors.toList()); - @SuppressWarnings("unchecked") - R res = (R) ret.getClass().newInstance(); + // @SuppressWarnings("unchecked") + // R res = (R) ret.getClass().newInstance(); - res.setId(ret.getId()); - List propagatedContexts = new ArrayList<>(); + // res.setId(ret.getId()); + // List propagatedContexts = new ArrayList<>(); for (String cId : communitySet) { if (!contextList.contains(cId)) { Context newContext = new Context(); @@ -133,11 +145,11 @@ public class SparkResultToCommunityFromOrganizationJob { PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, ModelConstants.DNET_PROVENANCE_ACTIONS))); - propagatedContexts.add(newContext); + ret.getContext().add(newContext); } } - res.setContext(propagatedContexts); - ret.mergeFrom(res); + // res.setContext(propagatedContexts); + // ret.mergeFrom(res); } return ret; }; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index dde534061..bb712d878 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -86,29 +86,30 @@ public class SparkResultToCommunityFromProject implements Serializable { ModelSupport.entityTypes .keySet() .parallelStream() + .filter(e -> ModelSupport.isResult(e)) .forEach(e -> { - if (ModelSupport.isResult(e)) { - removeOutputDir(spark, outputPath + e.name()); - Class resultClazz = ModelSupport.entityTypes.get(e); - Dataset result = readPath(spark, inputPath + e.name(), resultClazz); + // if () { + removeOutputDir(spark, outputPath + e.name()); + Class resultClazz = ModelSupport.entityTypes.get(e); + Dataset result = readPath(spark, inputPath + e.name(), resultClazz); - result - .joinWith( - possibleUpdates, - result.col("id").equalTo(possibleUpdates.col("resultId")), - "left_outer") - .map(resultCommunityFn(), Encoders.bean(resultClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + e.name()); + result + .joinWith( + possibleUpdates, + result.col("id").equalTo(possibleUpdates.col("resultId")), + "left_outer") + .map(resultCommunityFn(), Encoders.bean(resultClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); - readPath(spark, outputPath + e.name(), resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(inputPath + e.name()); - } + readPath(spark, outputPath + e.name(), resultClazz) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + e.name()); + // } }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 4cb759343..a84e8ab6b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,12 +1,12 @@ -sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched -resumeFrom=CountryPropagation +sourcePath=/tmp/beta_provision/graph/10_graph_orcid_enriched +resumeFrom=CommunityOrganization allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo datasourceWhitelistForCountryPropagation=10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48 #allowedtypes=pubsrepository::institutional allowedtypes=Institutional -outputPath=/tmp/miriam/enrichment_one_step +outputPath=/tmp/beta_provision/graph/11_graph_orcid pathMap ={"author":"$['author'][*]['fullname']", \ "title":"$['title'][*]['value']",\ "orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" ,\ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index 8e91707b6..9b7fad325 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -231,7 +231,7 @@ - + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json new file mode 100644 index 000000000..4645be435 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json @@ -0,0 +1,22 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml index 6aeffb457..18c5f4f0f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -69,7 +69,7 @@ yarn cluster - community2resultfromorganization-Publication + community2resultfromorganization eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob dhp-enrichment-${projectVersion}.jar @@ -88,6 +88,33 @@ --sourcePath${sourcePath}/ --outputPath${workingDir}/communityorganization/resulttocommunityfromorganization/ + + + + + + + yarn + cluster + community2resultfromorganization - move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/communityorganization/resulttocommunityfromorganization/ + --outputPath${sourcePath}/ + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml index dd845064b..01e366c02 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml @@ -86,12 +86,37 @@ --sourcePath${sourcePath}/ --outputPath${workingDir}/communitythroughproject/ + + + + + + + yarn + cluster + community2resultfromorganization - move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/communitythroughproject/ + --outputPath${sourcePath}/ + + - - \ No newline at end of file From 67ce2d54be4019d3d1aa157cbc1d50eb03f1ea59 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 17 Jan 2024 16:50:00 +0100 Subject: [PATCH 31/57] [enrichment single step] refactoring to fix issues in disappeared result type --- .../SparkCountryPropagationJob.java | 6 -- .../SparkResultToCommunityFromProject.java | 10 +- ...parkResultToCommunityThroughSemRelJob.java | 21 +---- .../eu/dnetlib/dhp/wf/main/job.properties | 4 +- .../dhp/wf/main/oozie_app/workflow.xml | 2 +- .../bulktag/oozie_app/config-default.xml | 12 ++- .../bulktag/oozie_app/workflow.xml | 18 +++- .../oozie_app/config-default.xml | 4 +- .../countrypropagation/oozie_app/workflow.xml | 92 ++++++++++++++----- .../oozie_app/workflow.xml | 15 ++- .../oozie_app/workflow.xml | 20 ++-- .../projecttoresult/oozie_app/workflow.xml | 15 ++- .../input_moveresult_parameters.json | 0 .../oozie_app/workflow.xml | 14 ++- .../oozie_app/workflow.xml | 16 +++- .../oozie_app/workflow.xml | 49 +++++++++- .../oozie_app/workflow.xml | 14 ++- 17 files changed, 229 insertions(+), 83 deletions(-) rename dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/{ => resulttocommunityfromorganization}/input_moveresult_parameters.json (100%) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 92930c18b..a0cc4c84a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -97,12 +97,6 @@ public class SparkCountryPropagationJob { .mode(SaveMode.Overwrite) .json(outputPath); - readPath(spark, outputPath, resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(sourcePath); - } private static MapFunction, R> getCountryMergeFn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index bb712d878..f9c36d7ca 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -104,11 +104,11 @@ public class SparkResultToCommunityFromProject implements Serializable { .option("compression", "gzip") .json(outputPath + e.name()); - readPath(spark, outputPath + e.name(), resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(inputPath + e.name()); +// readPath(spark, outputPath + e.name(), resultClazz) +// .write() +// .mode(SaveMode.Overwrite) +// .option("compression", "gzip") +// .json(inputPath + e.name()); // } }); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 4929c7582..3cf2f73c3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -101,11 +101,6 @@ public class SparkResultToCommunityThroughSemRelJob { .option("compression", "gzip") .json(outputPath); - readPath(spark, outputPath, resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(inputPath); } private static MapFunction, R> contextUpdaterFn() { @@ -115,11 +110,11 @@ public class SparkResultToCommunityThroughSemRelJob { if (rcl.isPresent()) { Set contexts = new HashSet<>(); ret.getContext().forEach(c -> contexts.add(c.getId())); - List contextList = rcl + rcl .get() .getCommunityList() .stream() - .map( + .forEach( c -> { if (!contexts.contains(c)) { Context newContext = new Context(); @@ -133,19 +128,11 @@ public class SparkResultToCommunityThroughSemRelJob { PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, ModelConstants.DNET_PROVENANCE_ACTIONS))); - return newContext; + ret.getContext().add(newContext); } - return null; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList()); - @SuppressWarnings("unchecked") - R r = (R) ret.getClass().newInstance(); + }); - r.setId(ret.getId()); - r.setContext(contextList); - ret.mergeFrom(r); } return ret; diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index a84e8ab6b..7e82d9b2c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,12 +1,12 @@ sourcePath=/tmp/beta_provision/graph/10_graph_orcid_enriched -resumeFrom=CommunityOrganization +resumeFrom=CommunitySemanticRelation allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo datasourceWhitelistForCountryPropagation=10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48 #allowedtypes=pubsrepository::institutional allowedtypes=Institutional -outputPath=/tmp/beta_provision/graph/11_graph_orcid +outputPath=/tmp/miriam/graph/11_graph_orcid pathMap ={"author":"$['author'][*]['fullname']", \ "title":"$['title'][*]['value']",\ "orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" ,\ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index 9b7fad325..8e91707b6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -231,7 +231,7 @@ - + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml index fe82ae194..2695253e6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml @@ -45,10 +45,18 @@ sparkExecutorMemory - 6G + 5G sparkExecutorCores - 1 + 4 + + + memoryOverhead + 3G + + + partitions + 3284 \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml index 6c5163448..c7a9e8a26 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml @@ -12,6 +12,10 @@ baseURL The URL to access the community APIs + + startFrom> + undelete + @@ -26,12 +30,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${wf:conf('startFrom') eq 'undelete'} + + + + + @@ -45,7 +57,7 @@ yarn-cluster cluster - bulkTagging-publication + bulkTagging eu.dnetlib.dhp.bulktag.SparkBulkTagJob dhp-enrichment-${projectVersion}.jar @@ -53,6 +65,8 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} + --conf spark.executor.memoryOverhead=${memoryOverhead} + --conf spark.sql.shuffle.partitions=${partitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml index 2744ea92b..1cb0b8a5e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml @@ -45,11 +45,11 @@ sparkExecutorMemory - 6G + 5G sparkExecutorCores - 1 + 4 spark2MaxExecutors diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml index 81d6dc3dc..3a6e3edfb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -12,6 +12,10 @@ allowedtypes the allowed types + + startFrom + undelete + @@ -25,7 +29,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -61,7 +73,7 @@ --sourcePath${sourcePath} --whitelist${whitelist} --allowedtypes${allowedtypes} - --outputPath${workingDir}/preparedInfo + --outputPath${workingDir}/country/preparedInfo @@ -95,10 +107,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/publication - --outputPath${workingDir}/publication - --workingPath${workingDir}/workingP + --outputPath${workingDir}/country/publication + --workingPath${workingDir}/country/workingP --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo @@ -125,10 +137,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/dataset - --outputPath${workingDir}/dataset - --workingPath${workingDir}/workingD + --outputPath${workingDir}/country/dataset + --workingPath${workingDir}/country/workingD --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo @@ -155,10 +167,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/otherresearchproduct - --outputPath${workingDir}/otherresearchproduct - --workingPath${workingDir}/workingO + --outputPath${workingDir}/country/otherresearchproduct + --workingPath${workingDir}/country/workingO --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo @@ -185,10 +197,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/software - --outputPath${workingDir}/software - --workingPath${workingDir}/workingS + --outputPath${workingDir}/country/software + --workingPath${workingDir}/country/workingS --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo @@ -224,9 +236,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/publication - --preparedInfoPath${workingDir}/publication + --preparedInfoPath${workingDir}/country/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/country/publication + --outputPath${workingDir}/country/country/publication @@ -253,9 +265,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/dataset - --preparedInfoPath${workingDir}/dataset + --preparedInfoPath${workingDir}/country/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/country/dataset + --outputPath${workingDir}/country/country/dataset @@ -282,9 +294,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/otherresearchproduct - --preparedInfoPath${workingDir}/otherresearchproduct + --preparedInfoPath${workingDir}/country/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/country/otherresearchproduct + --outputPath${workingDir}/country/country/otherresearchproduct @@ -311,15 +323,49 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/software - --preparedInfoPath${workingDir}/software + --preparedInfoPath${workingDir}/country/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/country/software + --outputPath${workingDir}/country/country/software - + + + + + yarn + cluster + community2resultfromorganization - move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/country/country/ + --outputPath${sourcePath}/ + + + + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml index 05824d209..ecec3579b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -4,7 +4,10 @@ sourcePath the source path - + + startFrom + undelete + @@ -18,7 +21,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index 483a805b1..bab1e55df 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -114,7 +114,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -142,7 +142,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -170,7 +170,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -198,7 +198,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -225,8 +225,8 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --sourcePath${workingDir}/orcidprop - --outputPath${workingDir}/orcidprop/mergedOrcidAssoc + --sourcePath${workingDir}/orcid/orcidprop + --outputPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc @@ -261,7 +261,7 @@ --conf spark.hadoop.mapreduce.reduce.speculative=false --conf spark.sql.shuffle.partitions=3840 - --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/publication @@ -291,7 +291,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/dataset @@ -321,7 +321,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/otherresearchproduct @@ -351,7 +351,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/software diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml index f0db9c777..f26f3f98b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -8,7 +8,10 @@ allowedsemrels the allowed semantics - + + startFrom + undelete + @@ -22,7 +25,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_moveresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_moveresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml index 18c5f4f0f..aa5357eea 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -8,6 +8,10 @@ baseURL the baseURL from where to reach the community APIs + + startFrom + undelete + @@ -21,7 +25,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml index 01e366c02..0ceee5a7e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml @@ -8,6 +8,10 @@ baseURL the base URL to use to select the right community APIs + + startFrom + undelete + @@ -21,7 +25,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -94,7 +106,7 @@ yarn cluster - community2resultfromorganization - move results + move results eu.dnetlib.dhp.MoveResult dhp-enrichment-${projectVersion}.jar diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml index 773c7fba7..b5e6fbf05 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -16,9 +16,21 @@ outputPath the output path + + startFrom + undelete + - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -209,9 +221,9 @@ dhp-enrichment-${projectVersion}.jar --executor-cores=6 - --executor-memory=5G - --conf spark.executor.memoryOverhead=3g - --conf spark.sql.shuffle.partitions=3284 + --executor-memory=4G + --conf spark.executor.memoryOverhead=5G + --conf spark.sql.shuffle.partitions=15000 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -324,7 +336,34 @@ - + + + + + yarn + cluster + move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/communitysemrel/ + --outputPath${sourcePath}/ + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml index e963453da..ca76a0e85 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -8,6 +8,10 @@ blacklist The list of institutional repositories that should not be used for the propagation + + startFrom + undelete + @@ -21,7 +25,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] From 82e9e262ee12e4cd55f1f8593893fc8e41b82a07 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 17 Jan 2024 17:38:03 +0100 Subject: [PATCH 32/57] [enrichment single step] remove parameter from execution --- .../SparkResultToProjectThroughSemRelJob.java | 29 +++++++++---------- .../eu/dnetlib/dhp/wf/main/job.properties | 2 +- .../projecttoresult/oozie_app/workflow.xml | 9 ------ 3 files changed, 15 insertions(+), 25 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index e7518673d..a6466716a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -64,7 +64,7 @@ public class SparkResultToProjectThroughSemRelJob { removeOutputDir(spark, outputPath); } execPropagation( - spark, outputPath, alreadyLinkedPath, potentialUpdatePath, saveGraph); + spark, outputPath, alreadyLinkedPath, potentialUpdatePath); }); } @@ -72,24 +72,23 @@ public class SparkResultToProjectThroughSemRelJob { SparkSession spark, String outputPath, String alreadyLinkedPath, - String potentialUpdatePath, - Boolean saveGraph) { + String potentialUpdatePath) { Dataset toaddrelations = readPath(spark, potentialUpdatePath, ResultProjectSet.class); Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, ResultProjectSet.class); - if (saveGraph) { - toaddrelations - .joinWith( - alreadyLinked, - toaddrelations.col("resultId").equalTo(alreadyLinked.col("resultId")), - "left_outer") - .flatMap(mapRelationRn(), Encoders.bean(Relation.class)) - .write() - .mode(SaveMode.Append) - .option("compression", "gzip") - .json(outputPath); - } + // if (saveGraph) { + toaddrelations + .joinWith( + alreadyLinked, + toaddrelations.col("resultId").equalTo(alreadyLinked.col("resultId")), + "left_outer") + .flatMap(mapRelationRn(), Encoders.bean(Relation.class)) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(outputPath); + // } } private static FlatMapFunction, Relation> mapRelationRn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 7e82d9b2c..05db04090 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,5 +1,5 @@ sourcePath=/tmp/beta_provision/graph/10_graph_orcid_enriched -resumeFrom=CommunitySemanticRelation +resumeFrom=ResultProject allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml index f26f3f98b..21cc5522f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -97,17 +97,8 @@ --potentialUpdatePath${workingDir}/resultproject/preparedInfo/potentialUpdates --alreadyLinkedPath${workingDir}/resultproject/preparedInfo/alreadyLinked - - - - - - - - - \ No newline at end of file From a12a3eb143477271c7682877c793864fc6aa442f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Jan 2024 15:18:10 +0100 Subject: [PATCH 33/57] - --- .../dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java index 5ffcf8d3f..c71ccb439 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java @@ -30,7 +30,7 @@ public class MoveResult implements Serializable { public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - SparkResultToCommunityFromOrganizationJob.class + MoveResult.class .getResourceAsStream( "/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json")); From 6af536541d5187b6d162a456f8d8c9fa455220ad Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Jan 2024 15:35:40 +0100 Subject: [PATCH 34/57] [enrichment single step] moving parameter file in correct location --- .../input_moveresult_parameters.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/{resulttocommunityfromorganization => }/input_moveresult_parameters.json (100%) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_moveresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_moveresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json From bcc0a13981c61d25c073d3b497f83e52121b066b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Jan 2024 17:39:14 +0100 Subject: [PATCH 35/57] [enrichment single step] adding element in wf definition --- .../dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml index 21cc5522f..287ee4ba8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -30,7 +30,6 @@ ${wf:conf('startFrom') eq 'undelete'} - @@ -101,4 +100,5 @@ + \ No newline at end of file From c6b3401596f9e05cf980af479f156a3a10a2d9ae Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 19 Jan 2024 10:15:39 +0100 Subject: [PATCH 36/57] increased shuffle partitions for publications in the country propagation workflow --- .../wf/subworkflows/countrypropagation/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml index 3a6e3edfb..b9cf69517 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -233,7 +233,7 @@ --conf spark.speculation=false --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=7680 --sourcePath${sourcePath}/publication --preparedInfoPath${workingDir}/country/publication From 2655eea5bc3075d4a649958c61971586db25452d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 19 Jan 2024 16:28:05 +0100 Subject: [PATCH 37/57] [orcid enrichment] drop paths before copying the non-modifyed contents --- .../dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml index ce117b5e9..bbd3581c5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml @@ -43,6 +43,17 @@ --graphPath${graphPath} --masteryarn + + + + + + + + + + + From 1c6db320f41882c34299e7c346d1c95d592d2644 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 22 Jan 2024 15:53:17 +0100 Subject: [PATCH 38/57] [graph provision] obtain context info from the context API instead from the ISLookUp service --- .../common/api/context/CategorySummary.java | 39 ++++++++++++++ .../api/context/CategorySummaryList.java | 7 +++ .../common/api/context/ConceptSummary.java | 52 +++++++++++++++++++ .../api/context/ConceptSummaryList.java | 7 +++ .../common/api/context/ContextSummary.java | 50 ++++++++++++++++++ .../api/context/ContextSummaryList.java | 7 +++ .../dhp/oa/provision/XmlConverterJob.java | 6 +-- .../dhp/oa/provision/utils/ContextMapper.java | 45 +++++++++++++++- .../dhp/oa/provision/oozie_app/workflow.xml | 6 ++- 9 files changed, 213 insertions(+), 6 deletions(-) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java new file mode 100644 index 000000000..fff28dbdf --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java @@ -0,0 +1,39 @@ + +package eu.dnetlib.dhp.common.api.context; + +public class CategorySummary { + + private String id; + + private String label; + + private boolean hasConcept; + + public String getId() { + return id; + } + + public String getLabel() { + return label; + } + + public boolean isHasConcept() { + return hasConcept; + } + + public CategorySummary setId(final String id) { + this.id = id; + return this; + } + + public CategorySummary setLabel(final String label) { + this.label = label; + return this; + } + + public CategorySummary setHasConcept(final boolean hasConcept) { + this.hasConcept = hasConcept; + return this; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java new file mode 100644 index 000000000..7213a945a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java @@ -0,0 +1,7 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.ArrayList; + +public class CategorySummaryList extends ArrayList { +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java new file mode 100644 index 000000000..a576f9a1e --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.List; + +public class ConceptSummary { + + private String id; + + private String label; + + public boolean hasSubConcept; + + private List concepts; + + public String getId() { + return id; + } + + public String getLabel() { + return label; + } + + public List getConcepts() { + return concepts; + } + + public ConceptSummary setId(final String id) { + this.id = id; + return this; + } + + public ConceptSummary setLabel(final String label) { + this.label = label; + return this; + } + + public boolean isHasSubConcept() { + return hasSubConcept; + } + + public ConceptSummary setHasSubConcept(final boolean hasSubConcept) { + this.hasSubConcept = hasSubConcept; + return this; + } + + public ConceptSummary setConcept(final List concepts) { + this.concepts = concepts; + return this; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java new file mode 100644 index 000000000..45ccd2810 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java @@ -0,0 +1,7 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.ArrayList; + +public class ConceptSummaryList extends ArrayList { +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java new file mode 100644 index 000000000..46a0d0d5a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java @@ -0,0 +1,50 @@ + +package eu.dnetlib.dhp.common.api.context; + +public class ContextSummary { + + private String id; + + private String label; + + private String type; + + private String status; + + public String getId() { + return id; + } + + public String getLabel() { + return label; + } + + public String getType() { + return type; + } + + public String getStatus() { + return status; + } + + public ContextSummary setId(final String id) { + this.id = id; + return this; + } + + public ContextSummary setLabel(final String label) { + this.label = label; + return this; + } + + public ContextSummary setType(final String type) { + this.type = type; + return this; + } + + public ContextSummary setStatus(final String status) { + this.status = status; + return this; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java new file mode 100644 index 000000000..618600007 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java @@ -0,0 +1,7 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.ArrayList; + +public class ContextSummaryList extends ArrayList { +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index 518f41120..6f43ca3f7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -62,8 +62,8 @@ public class XmlConverterJob { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final String isLookupUrl = parser.get("isLookupUrl"); - log.info("isLookupUrl: {}", isLookupUrl); + final String contextApiBaseUrl = parser.get("contextApiBaseUrl"); + log.info("contextApiBaseUrl: {}", contextApiBaseUrl); final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); @@ -71,7 +71,7 @@ public class XmlConverterJob { runWithSparkSession(conf, isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl)); + convertToXml(spark, inputPath, outputPath, ContextMapper.fromAPI(contextApiBaseUrl)); }); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java index bcaf40603..96d92fed6 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java @@ -1,18 +1,22 @@ package eu.dnetlib.dhp.oa.provision.utils; -import java.io.Serializable; -import java.io.StringReader; +import java.io.*; +import java.net.HttpURLConnection; +import java.net.URL; import java.util.HashMap; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.jetbrains.annotations.NotNull; import org.xml.sax.SAXException; import com.google.common.base.Joiner; +import eu.dnetlib.dhp.common.api.context.*; +import eu.dnetlib.dhp.common.rest.DNetRestClient; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -23,6 +27,42 @@ public class ContextMapper extends HashMap implements Serial private static final String XQUERY = "for $x in //RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType']//*[name()='context' or name()='category' or name()='concept'] return "; + public static ContextMapper fromAPI(final String baseURL) throws Exception { + + final ContextMapper contextMapper = new ContextMapper(); + + for (ContextSummary ctx : DNetRestClient.doGET(baseURL + "/contexts", ContextSummaryList.class)) { + + contextMapper.put(ctx.getId(), new ContextDef(ctx.getId(), ctx.getLabel(), "context", ctx.getType())); + + for (CategorySummary cat : DNetRestClient + .doGET(baseURL + "/context/" + ctx.getId(), CategorySummaryList.class)) { + contextMapper.put(cat.getId(), new ContextDef(cat.getId(), cat.getLabel(), "category", "")); + if (cat.isHasConcept()) { + for (ConceptSummary c : DNetRestClient + .doGET(baseURL + "/context/category/" + cat.getId(), ConceptSummaryList.class)) { + contextMapper.put(c.getId(), new ContextDef(c.getId(), c.getLabel(), "concept", "")); + if (c.isHasSubConcept()) { + for (ConceptSummary cs : c.getConcepts()) { + contextMapper.put(cs.getId(), new ContextDef(cs.getId(), cs.getLabel(), "concept", "")); + if (cs.isHasSubConcept()) { + for (ConceptSummary css : cs.getConcepts()) { + contextMapper + .put( + css.getId(), + new ContextDef(css.getId(), css.getLabel(), "concept", "")); + } + } + } + } + } + } + } + } + return contextMapper; + } + + @Deprecated public static ContextMapper fromIS(final String isLookupUrl) throws DocumentException, ISLookUpException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); @@ -32,6 +72,7 @@ public class ContextMapper extends HashMap implements Serial return fromXml(sb.toString()); } + @Deprecated public static ContextMapper fromXml(final String xml) throws DocumentException, SAXException { final ContextMapper contextMapper = new ContextMapper(); diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 2e7b11dde..9eab960f0 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -9,6 +9,10 @@ isLookupUrl URL for the isLookup service + + contextApiBaseUrl + context API URL + relPartitions number or partitions for the relations Dataset @@ -589,7 +593,7 @@ --inputPath${workingDir}/join_entities --outputPath${workingDir}/xml - --isLookupUrl${isLookupUrl} + --contextApiBaseUrl${contextApiBaseUrl} From 6fd25cf549e3892d3d1f114848367ea00dd84399 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 23 Jan 2024 08:47:12 +0100 Subject: [PATCH 39/57] code formatting --- .../eu/dnetlib/dhp/oozie/RunSQLSparkJob.java | 18 +- .../dhp/oa/dedup/DedupRecordFactory.java | 284 +++++++++--------- .../dhp/oa/dedup/SparkCreateMergeRels.java | 5 +- 3 files changed, 158 insertions(+), 149 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java index ef296bfc9..027bf0735 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java @@ -1,13 +1,7 @@ package eu.dnetlib.dhp.oozie; -import com.google.common.io.Resources; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import org.apache.commons.lang3.time.DurationFormatUtils; -import org.apache.commons.text.StringSubstitutor; -import org.apache.spark.SparkConf; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.net.URL; import java.nio.charset.StandardCharsets; @@ -15,7 +9,15 @@ import java.util.HashMap; import java.util.Map; import java.util.Optional; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import org.apache.commons.lang3.time.DurationFormatUtils; +import org.apache.commons.text.StringSubstitutor; +import org.apache.spark.SparkConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.io.Resources; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; public class RunSQLSparkJob { private static final Logger log = LoggerFactory.getLogger(RunSQLSparkJob.class); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 4c12d1dc6..eddfba309 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -1,6 +1,16 @@ package eu.dnetlib.dhp.oa.dedup; +import java.util.*; +import java.util.stream.Stream; + +import org.apache.commons.beanutils.BeanUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.ReduceFunction; +import org.apache.spark.sql.*; + import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; @@ -8,180 +18,176 @@ import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Result; -import org.apache.commons.beanutils.BeanUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.ReduceFunction; -import org.apache.spark.sql.*; import scala.Tuple2; import scala.Tuple3; import scala.collection.JavaConversions; -import java.util.*; -import java.util.stream.Stream; - public class DedupRecordFactory { - public static final class DedupRecordReduceState { - public final String dedupId; + public static final class DedupRecordReduceState { + public final String dedupId; - public final ArrayList aliases = new ArrayList<>(); + public final ArrayList aliases = new ArrayList<>(); - public final HashSet acceptanceDate = new HashSet<>(); + public final HashSet acceptanceDate = new HashSet<>(); - public OafEntity entity; + public OafEntity entity; - public DedupRecordReduceState(String dedupId, String id, OafEntity entity) { - this.dedupId = dedupId; - this.entity = entity; - if (entity == null) { - aliases.add(id); - } else { - if (Result.class.isAssignableFrom(entity.getClass())) { - Result result = (Result) entity; - if (result.getDateofacceptance() != null && StringUtils.isNotBlank(result.getDateofacceptance().getValue())) { - acceptanceDate.add(result.getDateofacceptance().getValue()); - } - } - } - } + public DedupRecordReduceState(String dedupId, String id, OafEntity entity) { + this.dedupId = dedupId; + this.entity = entity; + if (entity == null) { + aliases.add(id); + } else { + if (Result.class.isAssignableFrom(entity.getClass())) { + Result result = (Result) entity; + if (result.getDateofacceptance() != null + && StringUtils.isNotBlank(result.getDateofacceptance().getValue())) { + acceptanceDate.add(result.getDateofacceptance().getValue()); + } + } + } + } - public String getDedupId() { - return dedupId; - } - } - private static final int MAX_ACCEPTANCE_DATE = 20; + public String getDedupId() { + return dedupId; + } + } - private DedupRecordFactory() { - } + private static final int MAX_ACCEPTANCE_DATE = 20; - public static Dataset createDedupRecord( - final SparkSession spark, - final DataInfo dataInfo, - final String mergeRelsInputPath, - final String entitiesInputPath, - final Class clazz) { + private DedupRecordFactory() { + } - final long ts = System.currentTimeMillis(); - final Encoder beanEncoder = Encoders.bean(clazz); - final Encoder kryoEncoder = Encoders.kryo(clazz); + public static Dataset createDedupRecord( + final SparkSession spark, + final DataInfo dataInfo, + final String mergeRelsInputPath, + final String entitiesInputPath, + final Class clazz) { - // - Dataset entities = spark - .read() - .schema(Encoders.bean(clazz).schema()) - .json(entitiesInputPath) - .as(beanEncoder) - .map( - (MapFunction>) entity -> { - return new Tuple2<>(entity.getId(), entity); - }, - Encoders.tuple(Encoders.STRING(), kryoEncoder)) - .selectExpr("_1 AS id", "_2 AS kryoObject"); + final long ts = System.currentTimeMillis(); + final Encoder beanEncoder = Encoders.bean(clazz); + final Encoder kryoEncoder = Encoders.kryo(clazz); - // : source is the dedup_id, target is the id of the mergedIn - Dataset mergeRels = spark - .read() - .load(mergeRelsInputPath) - .where("relClass == 'merges'") - .selectExpr("source as dedupId", "target as id"); + // + Dataset entities = spark + .read() + .schema(Encoders.bean(clazz).schema()) + .json(entitiesInputPath) + .as(beanEncoder) + .map( + (MapFunction>) entity -> { + return new Tuple2<>(entity.getId(), entity); + }, + Encoders.tuple(Encoders.STRING(), kryoEncoder)) + .selectExpr("_1 AS id", "_2 AS kryoObject"); - return mergeRels - .join(entities, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") - .select("dedupId", "id", "kryoObject") - .as(Encoders.tuple(Encoders.STRING(), Encoders.STRING(), kryoEncoder)) - .map((MapFunction, DedupRecordReduceState>) t -> new DedupRecordReduceState(t._1(), t._2(), t._3()), Encoders.kryo(DedupRecordReduceState.class)) - .groupByKey((MapFunction) DedupRecordReduceState::getDedupId, Encoders.STRING()) - .reduceGroups( - (ReduceFunction) (t1, t2) -> { - if (t1.entity == null) { - t2.aliases.addAll(t1.aliases); - return t2; - } - if (t1.acceptanceDate.size() < MAX_ACCEPTANCE_DATE) { - t1.acceptanceDate.addAll(t2.acceptanceDate); - } - t1.aliases.addAll(t2.aliases); - t1.entity = reduceEntity(t1.entity, t2.entity); + // : source is the dedup_id, target is the id of the mergedIn + Dataset mergeRels = spark + .read() + .load(mergeRelsInputPath) + .where("relClass == 'merges'") + .selectExpr("source as dedupId", "target as id"); - return t1; - } - ) - .flatMap - ((FlatMapFunction, OafEntity>) t -> { - String dedupId = t._1(); - DedupRecordReduceState agg = t._2(); + return mergeRels + .join(entities, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") + .select("dedupId", "id", "kryoObject") + .as(Encoders.tuple(Encoders.STRING(), Encoders.STRING(), kryoEncoder)) + .map( + (MapFunction, DedupRecordReduceState>) t -> new DedupRecordReduceState( + t._1(), t._2(), t._3()), + Encoders.kryo(DedupRecordReduceState.class)) + .groupByKey( + (MapFunction) DedupRecordReduceState::getDedupId, Encoders.STRING()) + .reduceGroups( + (ReduceFunction) (t1, t2) -> { + if (t1.entity == null) { + t2.aliases.addAll(t1.aliases); + return t2; + } + if (t1.acceptanceDate.size() < MAX_ACCEPTANCE_DATE) { + t1.acceptanceDate.addAll(t2.acceptanceDate); + } + t1.aliases.addAll(t2.aliases); + t1.entity = reduceEntity(t1.entity, t2.entity); - if (agg.acceptanceDate.size() >= MAX_ACCEPTANCE_DATE) { - return Collections.emptyIterator(); - } + return t1; + }) + .flatMap((FlatMapFunction, OafEntity>) t -> { + String dedupId = t._1(); + DedupRecordReduceState agg = t._2(); - return Stream.concat(Stream.of(agg.getDedupId()), agg.aliases.stream()) - .map(id -> { - try { - OafEntity res = (OafEntity) BeanUtils.cloneBean(agg.entity); - res.setId(id); - res.setDataInfo(dataInfo); - res.setLastupdatetimestamp(ts); - return res; - } catch (Exception e) { - throw new RuntimeException(e); - } - }).iterator(); - }, beanEncoder); - } + if (agg.acceptanceDate.size() >= MAX_ACCEPTANCE_DATE) { + return Collections.emptyIterator(); + } - private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) { + return Stream + .concat(Stream.of(agg.getDedupId()), agg.aliases.stream()) + .map(id -> { + try { + OafEntity res = (OafEntity) BeanUtils.cloneBean(agg.entity); + res.setId(id); + res.setDataInfo(dataInfo); + res.setLastupdatetimestamp(ts); + return res; + } catch (Exception e) { + throw new RuntimeException(e); + } + }) + .iterator(); + }, beanEncoder); + } + + private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) { if (duplicate == null) { return entity; } + int compare = new IdentifierComparator<>() + .compare(Identifier.newInstance(entity), Identifier.newInstance(duplicate)); - int compare = new IdentifierComparator<>() - .compare(Identifier.newInstance(entity), Identifier.newInstance(duplicate)); - - if (compare > 0) { + if (compare > 0) { OafEntity swap = duplicate; - duplicate = entity; - entity = swap; - } + duplicate = entity; + entity = swap; + } - entity.mergeFrom(duplicate); + entity.mergeFrom(duplicate); - if (ModelSupport.isSubClass(duplicate, Result.class)) { - Result re = (Result) entity; - Result rd = (Result) duplicate; + if (ModelSupport.isSubClass(duplicate, Result.class)) { + Result re = (Result) entity; + Result rd = (Result) duplicate; - List> authors = new ArrayList<>(); - if (re.getAuthor() != null) { - authors.add(re.getAuthor()); - } - if (rd.getAuthor() != null) { - authors.add(rd.getAuthor()); - } + List> authors = new ArrayList<>(); + if (re.getAuthor() != null) { + authors.add(re.getAuthor()); + } + if (rd.getAuthor() != null) { + authors.add(rd.getAuthor()); + } - re.setAuthor(AuthorMerger.merge(authors)); - } + re.setAuthor(AuthorMerger.merge(authors)); + } - return entity; - } + return entity; + } - public static T entityMerger( - String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) { - T base = entities.next()._2(); + public static T entityMerger( + String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) { + T base = entities.next()._2(); - while (entities.hasNext()) { - T duplicate = entities.next()._2(); - if (duplicate != null) - base = (T) reduceEntity(base, duplicate); - } + while (entities.hasNext()) { + T duplicate = entities.next()._2(); + if (duplicate != null) + base = (T) reduceEntity(base, duplicate); + } - base.setId(id); - base.setDataInfo(dataInfo); - base.setLastupdatetimestamp(ts); + base.setId(id); + base.setDataInfo(dataInfo); + base.setLastupdatetimestamp(ts); - return base; - } + return base; + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index 191870d3b..59626c141 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -242,13 +242,14 @@ public class SparkCreateMergeRels extends AbstractSparkAction { // this was a pivot in a previous graph but it has been merged into a new group with different // pivot - if (!r.isNullAt(r.fieldIndex("lastUsage")) && !pivot.equals(id) && !dedupId.equals(pivotDedupId)) { + if (!r.isNullAt(r.fieldIndex("lastUsage")) && !pivot.equals(id) + && !dedupId.equals(pivotDedupId)) { // materialize the previous dedup record as a merge relation with the new one res.add(new Tuple3<>(dedupId, pivotDedupId, null)); } // add merge relations - if (cut <=0 || r.getAs("position") <= cut) { + if (cut <= 0 || r. getAs("position") <= cut) { res.add(new Tuple3<>(id, pivotDedupId, pivot)); } From f87f3a6483d1ea18945cd8055a3b97a4973b682a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 23 Jan 2024 08:54:37 +0100 Subject: [PATCH 40/57] [graph provision] updated param specification for the XML converter job --- .../dhp/oa/provision/input_params_xml_converter.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json index eda6154d7..653a69ed1 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json @@ -12,9 +12,9 @@ "paramRequired": true }, { - "paramName": "ilu", - "paramLongName": "isLookupUrl", - "paramDescription": "URL of the isLookUp Service", + "paramName": "cau", + "paramLongName": "contextApiBaseUrl", + "paramDescription": "URL of the context API", "paramRequired": true } ] From 3e96777cc4ce5896a0c1f1af5b5adf00546fec04 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 23 Jan 2024 15:21:03 +0100 Subject: [PATCH 41/57] [collection] increased logging from the oai-pmh metadata collection process --- .../dhp/common/collection/HttpConnector2.java | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 905457bcd..08cc3ec59 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -8,10 +8,13 @@ import java.io.InputStream; import java.net.*; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.math.NumberUtils; +import org.apache.commons.lang3.time.DateUtils; import org.apache.http.HttpHeaders; +import org.joda.time.Instant; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -98,6 +101,7 @@ public class HttpConnector2 { InputStream input = null; + long start = System.currentTimeMillis(); try { if (getClientParams().getRequestDelay() > 0) { backoffAndSleep(getClientParams().getRequestDelay()); @@ -115,9 +119,8 @@ public class HttpConnector2 { urlConn.addRequestProperty(headerEntry.getKey(), headerEntry.getValue()); } } - if (log.isDebugEnabled()) { - logHeaderFields(urlConn); - } + + logHeaderFields(urlConn); int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT); @@ -167,12 +170,14 @@ public class HttpConnector2 { .warn( "{} - waiting and repeating request after default delay of {} sec.", requestUrl, getClientParams().getRetryDelay()); - backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); + backoffAndSleep(retryNumber * getClientParams().getRetryDelay()); } report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); urlConn.disconnect(); return attemptDownload(requestUrl, retryNumber + 1, report); default: + log.error("gor error {} from URL: {}", urlConn.getResponseCode(), urlConn.getURL()); + log.error("response message: {}", urlConn.getResponseMessage()); report .put( REPORT_PREFIX + urlConn.getResponseCode(), @@ -196,16 +201,21 @@ public class HttpConnector2 { report.put(e.getClass().getName(), e.getMessage()); backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000); return attemptDownload(requestUrl, retryNumber + 1, report); + } finally { + log + .info( + "request time elapsed: {}sec", + TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - start)); } } private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.debug("StatusCode: {}", urlConn.getResponseMessage()); + log.info("StatusCode: {}", urlConn.getResponseMessage()); for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { for (String v : e.getValue()) { - log.debug(" key: {} - value: {}", e.getKey(), v); + log.info(" key: {} - value: {}", e.getKey(), v); } } } From 9b13c22e5d9a6d916f53be71400456712397ebaf Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 23 Jan 2024 15:36:08 +0100 Subject: [PATCH 42/57] [graph provision] retrieve all the context information by adding all=true to the requests issued to thr API --- .../eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java index 96d92fed6..083dbe988 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java @@ -31,16 +31,19 @@ public class ContextMapper extends HashMap implements Serial final ContextMapper contextMapper = new ContextMapper(); - for (ContextSummary ctx : DNetRestClient.doGET(baseURL + "/contexts", ContextSummaryList.class)) { + for (ContextSummary ctx : DNetRestClient + .doGET(String.format("%s/contexts", baseURL), ContextSummaryList.class)) { contextMapper.put(ctx.getId(), new ContextDef(ctx.getId(), ctx.getLabel(), "context", ctx.getType())); for (CategorySummary cat : DNetRestClient - .doGET(baseURL + "/context/" + ctx.getId(), CategorySummaryList.class)) { + .doGET(String.format("%s/context/%s?all=true", baseURL, ctx.getId()), CategorySummaryList.class)) { contextMapper.put(cat.getId(), new ContextDef(cat.getId(), cat.getLabel(), "category", "")); if (cat.isHasConcept()) { for (ConceptSummary c : DNetRestClient - .doGET(baseURL + "/context/category/" + cat.getId(), ConceptSummaryList.class)) { + .doGET( + String.format("%s/context/category/%s?all=true", baseURL, cat.getId()), + ConceptSummaryList.class)) { contextMapper.put(c.getId(), new ContextDef(c.getId(), c.getLabel(), "concept", "")); if (c.isHasSubConcept()) { for (ConceptSummary cs : c.getConcepts()) { From 2c1e6849f0a43c28c58907829eb6a3f060f48f2c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 24 Jan 2024 10:36:41 +0100 Subject: [PATCH 43/57] added code of conduct and contributing files --- CODE_OF_CONDUCT.md | 43 +++++++++++++++++++++++++++++++++++++++++++ CONTRIBUTING.md | 9 +++++++++ LICENSE => LICENSE.md | 0 README.md | 5 +++++ 4 files changed, 57 insertions(+) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md rename LICENSE => LICENSE.md (100%) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..aff151f94 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,43 @@ +# Contributor Code of Conduct + +Openness, transparency and our community-driven participatory approach guide us in our day-to-day interactions and decision-making. Our open source projects are no exception. Trust, respect, collaboration and transparency are core values we believe should live and breathe within our projects. Our community welcomes participants from around the world with different experiences, unique perspectives, and great ideas to share. + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Attempting collaboration before conflict +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +- Violence, threats of violence, or inciting others to commit self-harm +- The use of sexualized language or imagery and unwelcome sexual attention or advances +- Trolling, intentionally spreading misinformation, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or electronic address, without explicit permission +- Abuse of the reporting process to intentionally harass or exclude others +- Advocating for, or encouraging, any of the above behavior +- Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), [version 1.4](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html). \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..6d83ebbcc --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,9 @@ +# Contributing to D-Net Hadoop + +:+1::tada: First off, thanks for taking the time to contribute! :tada::+1: + +This project and everyone participating in it is governed by our [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it). + +The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. + +All contributions are welcome, all contributions will be considered to be contributed under the [project license](#LICENSE.md). diff --git a/LICENSE b/LICENSE.md similarity index 100% rename from LICENSE rename to LICENSE.md diff --git a/README.md b/README.md index 2c1440f44..b6575814d 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,11 @@ Dnet-hadoop is the project that defined all the [OOZIE workflows](https://oozie.apache.org/) for the OpenAIRE Graph construction, processing, provisioning. +This project adheres to the Contributor Covenant [code of conduct](CODE_OF_CONDUCT.md). +By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it). + +This project is licensed under the [AGPL v3 or later version](#LICENSE.md). + How to build, package and run oozie workflows ==================== From 0c97a3a81a55cbdc24342d88d3862a89da1a6c5c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 24 Jan 2024 10:56:33 +0100 Subject: [PATCH 44/57] minor --- CONTRIBUTING.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6d83ebbcc..34a26f913 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,6 +4,7 @@ This project and everyone participating in it is governed by our [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it). -The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. +The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules, which applies to this project as a while, including all its sub-modules. +Use your best judgment, and feel free to propose changes to this document in a pull request. All contributions are welcome, all contributions will be considered to be contributed under the [project license](#LICENSE.md). From 2838a9b63086493c5d845728336438d01595f56b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 24 Jan 2024 16:07:05 +0100 Subject: [PATCH 45/57] Update 'CONTRIBUTING.md' --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 34a26f913..13a359c86 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,4 +7,4 @@ This project and everyone participating in it is governed by our [Code of Conduc The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules, which applies to this project as a while, including all its sub-modules. Use your best judgment, and feel free to propose changes to this document in a pull request. -All contributions are welcome, all contributions will be considered to be contributed under the [project license](#LICENSE.md). +All contributions are welcome, all contributions will be considered to be contributed under the [project license](LICENSE.md). From a7115cfa9e595c7db1b41cf9a34b0ae72a08d620 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 25 Jan 2024 15:06:34 +0100 Subject: [PATCH 46/57] max mem of joins (hive.mapjoin.followby.gby.localtask.max.memory.usage) now 80%, up from 55%. --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index cbf97944d..f15f22320 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -78,6 +78,10 @@ hive.txn.timeout ${hive_timeout} + + hive.mapjoin.followby.gby.localtask.max.memory.usage + 0.80 + mapred.job.queue.name analytics From 9e8fc6aa88d592fc3cb354bc36894b95679b5092 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 26 Jan 2024 09:17:20 +0100 Subject: [PATCH 47/57] [collection] increased logging from the oai-pmh metadata collection process --- .../dhp/common/collection/HttpConnector2.java | 63 ++++++++++++++----- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 08cc3ec59..342d73cdc 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -97,8 +97,6 @@ public class HttpConnector2 { throw new CollectorException(msg); } - log.info("Request attempt {} [{}]", retryNumber, requestUrl); - InputStream input = null; long start = System.currentTimeMillis(); @@ -106,6 +104,9 @@ public class HttpConnector2 { if (getClientParams().getRequestDelay() > 0) { backoffAndSleep(getClientParams().getRequestDelay()); } + + log.info("Request attempt {} [{}]", retryNumber, requestUrl); + final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection(); urlConn.setInstanceFollowRedirects(false); urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000); @@ -135,9 +136,7 @@ public class HttpConnector2 { } if (is2xx(urlConn.getResponseCode())) { - input = urlConn.getInputStream(); - responseType = urlConn.getContentType(); - return input; + return getInputStream(urlConn, start); } if (is3xx(urlConn.getResponseCode())) { // REDIRECTS @@ -147,6 +146,7 @@ public class HttpConnector2 { .put( REPORT_PREFIX + urlConn.getResponseCode(), String.format("Moved to: %s", newUrl)); + logRequestTime(start); urlConn.disconnect(); if (retryAfter > 0) { backoffAndSleep(retryAfter); @@ -162,19 +162,39 @@ public class HttpConnector2 { if (retryAfter > 0) { log .warn( - "{} - waiting and repeating request after suggested retry-after {} sec.", - requestUrl, retryAfter); + "waiting and repeating request after suggested retry-after {} sec for URL {}", + retryAfter, requestUrl); backoffAndSleep(retryAfter * 1000); } else { log .warn( - "{} - waiting and repeating request after default delay of {} sec.", - requestUrl, getClientParams().getRetryDelay()); + "waiting and repeating request after default delay of {} sec for URL {}", + getClientParams().getRetryDelay(), requestUrl); backoffAndSleep(retryNumber * getClientParams().getRetryDelay()); } report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); + + logRequestTime(start); + urlConn.disconnect(); + return attemptDownload(requestUrl, retryNumber + 1, report); + case 422: // UNPROCESSABLE ENTITY + report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); + log.warn("waiting and repeating request after 10 sec for URL {}", requestUrl); + backoffAndSleep(10000); + urlConn.disconnect(); + logRequestTime(start); + try { + return getInputStream(urlConn, start); + } catch (IOException e) { + log + .error( + "server returned 422 and got IOException accessing the response body from URL {}", + requestUrl); + log.error("IOException:", e); + return attemptDownload(requestUrl, retryNumber + 1, report); + } default: log.error("gor error {} from URL: {}", urlConn.getResponseCode(), urlConn.getURL()); log.error("response message: {}", urlConn.getResponseMessage()); @@ -184,6 +204,8 @@ public class HttpConnector2 { String .format( "%s Error: %s", requestUrl, urlConn.getResponseMessage())); + logRequestTime(start); + urlConn.disconnect(); throw new CollectorException(urlConn.getResponseCode() + " error " + report); } } @@ -201,16 +223,25 @@ public class HttpConnector2 { report.put(e.getClass().getName(), e.getMessage()); backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000); return attemptDownload(requestUrl, retryNumber + 1, report); - } finally { - log - .info( - "request time elapsed: {}sec", - TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - start)); } } + private InputStream getInputStream(HttpURLConnection urlConn, long start) throws IOException { + InputStream input = urlConn.getInputStream(); + responseType = urlConn.getContentType(); + logRequestTime(start); + return input; + } + + private static void logRequestTime(long start) { + log + .info( + "request time elapsed: {}sec", + TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - start)); + } + private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.info("StatusCode: {}", urlConn.getResponseMessage()); + log.info("Response: {} - {}", urlConn.getResponseCode(), urlConn.getResponseMessage()); for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { @@ -235,7 +266,7 @@ public class HttpConnector2 { for (String key : headerMap.keySet()) { if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) && NumberUtils.isCreatable(headerMap.get(key).get(0))) { - return Integer.parseInt(headerMap.get(key).get(0)) + 10; + return Integer.parseInt(headerMap.get(key).get(0)); } } return -1; From e889808daa889530893bab370442811f4dd9dc4f Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 26 Jan 2024 12:19:04 +0100 Subject: [PATCH 48/57] Fixed problem on missing author in crossref Mapping --- .../scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 6 +++--- .../eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json | 1 + .../dhp/doiboost/crossref/CrossrefMappingTest.scala | 7 +++++++ 3 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index ee857e2c4..64090733d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -25,7 +25,7 @@ case class mappingAffiliation(name: String) {} case class mappingAuthor( given: Option[String], - family: String, + family: Option[String], sequence: Option[String], ORCID: Option[String], affiliation: Option[mappingAffiliation] @@ -226,14 +226,14 @@ case object Crossref2Oaf { //Mapping Author val authorList: List[mappingAuthor] = - (json \ "author").extractOrElse[List[mappingAuthor]](List()) + (json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined) val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) => a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first") ) result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) => - generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index) + generateAuhtor(a.given.orNull, a.family.get, a.ORCID.orNull, index) }.asJava) // Mapping instance diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json new file mode 100644 index 000000000..8e75f3586 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json @@ -0,0 +1 @@ +{"indexed":{"date-parts":[[2023,12,29]],"date-time":"2023-12-29T10:40:34Z","timestamp":1703846434800},"reference-count":65,"publisher":"Springer Science and Business Media LLC","license":[{"start":{"date-parts":[[2023,2,9]],"date-time":"2023-02-09T00:00:00Z","timestamp":1675900800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,2,9]],"date-time":"2023-02-09T00:00:00Z","timestamp":1675900800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat. Phys."],"DOI":"10.1038\/s41567-022-01757-y","type":"journal-article","created":{"date-parts":[[2023,2,10]],"date-time":"2023-02-10T15:32:19Z","timestamp":1676043139000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Observation of electroweak production of two jets and a Z-boson pair"],"prefix":"10.1038","author":[{"name":"ATLAS Collaboration","sequence":"first","affiliation":[]},{"given":"G.","family":"Aad","sequence":"additional","affiliation":[]},{"given":"B.","family":"Abbott","sequence":"additional","affiliation":[]},{"given":"D. C.","family":"Abbott","sequence":"additional","affiliation":[]},{"given":"A.","family":"Abed Abud","sequence":"additional","affiliation":[]},{"given":"K.","family":"Abeling","sequence":"additional","affiliation":[]},{"given":"D. K.","family":"Abhayasinghe","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Abidi","sequence":"additional","affiliation":[]},{"given":"O. S.","family":"AbouZeid","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Abraham","sequence":"additional","affiliation":[]},{"given":"H.","family":"Abramowicz","sequence":"additional","affiliation":[]},{"given":"H.","family":"Abreu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Abulaiti","sequence":"additional","affiliation":[]},{"given":"B. S.","family":"Acharya","sequence":"additional","affiliation":[]},{"given":"B.","family":"Achkar","sequence":"additional","affiliation":[]},{"given":"S.","family":"Adachi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Adam","sequence":"additional","affiliation":[]},{"given":"C. Adam","family":"Bourdarios","sequence":"additional","affiliation":[]},{"given":"L.","family":"Adamczyk","sequence":"additional","affiliation":[]},{"given":"L.","family":"Adamek","sequence":"additional","affiliation":[]},{"given":"J.","family":"Adelman","sequence":"additional","affiliation":[]},{"given":"M.","family":"Adersberger","sequence":"additional","affiliation":[]},{"given":"A.","family":"Adiguzel","sequence":"additional","affiliation":[]},{"given":"S.","family":"Adorni","sequence":"additional","affiliation":[]},{"given":"T.","family":"Adye","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Affolder","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Afik","sequence":"additional","affiliation":[]},{"given":"C.","family":"Agapopoulou","sequence":"additional","affiliation":[]},{"given":"M. N.","family":"Agaras","sequence":"additional","affiliation":[]},{"given":"A.","family":"Aggarwal","sequence":"additional","affiliation":[]},{"given":"C.","family":"Agheorghiesei","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Aguilar-Saavedra","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ahmadov","sequence":"additional","affiliation":[]},{"given":"W. S.","family":"Ahmed","sequence":"additional","affiliation":[]},{"given":"X.","family":"Ai","sequence":"additional","affiliation":[]},{"given":"G.","family":"Aielli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Akatsuka","sequence":"additional","affiliation":[]},{"given":"T. P. A.","family":"\u00c5kesson","sequence":"additional","affiliation":[]},{"given":"E.","family":"Akilli","sequence":"additional","affiliation":[]},{"given":"A. V.","family":"Akimov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Al Khoury","sequence":"additional","affiliation":[]},{"given":"G. L.","family":"Alberghi","sequence":"additional","affiliation":[]},{"given":"J.","family":"Albert","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Alconada Verzini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Alderweireldt","sequence":"additional","affiliation":[]},{"given":"M.","family":"Aleksa","sequence":"additional","affiliation":[]},{"given":"I. N.","family":"Aleksandrov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Alexa","sequence":"additional","affiliation":[]},{"given":"T.","family":"Alexopoulos","sequence":"additional","affiliation":[]},{"given":"A.","family":"Alfonsi","sequence":"additional","affiliation":[]},{"given":"F.","family":"Alfonsi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Alhroob","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ali","sequence":"additional","affiliation":[]},{"given":"M.","family":"Aliev","sequence":"additional","affiliation":[]},{"given":"G.","family":"Alimonti","sequence":"additional","affiliation":[]},{"given":"C.","family":"Allaire","sequence":"additional","affiliation":[]},{"given":"B. M. M.","family":"Allbrooke","sequence":"additional","affiliation":[]},{"given":"B. W.","family":"Allen","sequence":"additional","affiliation":[]},{"given":"P. P.","family":"Allport","sequence":"additional","affiliation":[]},{"given":"A.","family":"Aloisio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Alonso","sequence":"additional","affiliation":[]},{"given":"C.","family":"Alpigiani","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Alshehri","sequence":"additional","affiliation":[]},{"given":"E.","family":"Alunno Camelia","sequence":"additional","affiliation":[]},{"given":"M.","family":"Alvarez Estevez","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Alviggi","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Amaral Coutinho","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ambler","sequence":"additional","affiliation":[]},{"given":"L.","family":"Ambroz","sequence":"additional","affiliation":[]},{"given":"C.","family":"Amelung","sequence":"additional","affiliation":[]},{"given":"D.","family":"Amidei","sequence":"additional","affiliation":[]},{"given":"S. P. Amor","family":"Dos Santos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Amoroso","sequence":"additional","affiliation":[]},{"given":"C. S.","family":"Amrouche","sequence":"additional","affiliation":[]},{"given":"F.","family":"An","sequence":"additional","affiliation":[]},{"given":"C.","family":"Anastopoulos","sequence":"additional","affiliation":[]},{"given":"N.","family":"Andari","sequence":"additional","affiliation":[]},{"given":"T.","family":"Andeen","sequence":"additional","affiliation":[]},{"given":"C. F.","family":"Anders","sequence":"additional","affiliation":[]},{"given":"J. K.","family":"Anders","sequence":"additional","affiliation":[]},{"given":"A.","family":"Andreazza","sequence":"additional","affiliation":[]},{"given":"V.","family":"Andrei","sequence":"additional","affiliation":[]},{"given":"C. R.","family":"Anelli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Angelidakis","sequence":"additional","affiliation":[]},{"given":"A.","family":"Angerami","sequence":"additional","affiliation":[]},{"given":"A. V.","family":"Anisenkov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Annovi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Antel","sequence":"additional","affiliation":[]},{"given":"M. T.","family":"Anthony","sequence":"additional","affiliation":[]},{"given":"E.","family":"Antipov","sequence":"additional","affiliation":[]},{"given":"M.","family":"Antonelli","sequence":"additional","affiliation":[]},{"given":"D. J. A.","family":"Antrim","sequence":"additional","affiliation":[]},{"given":"F.","family":"Anulli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Aoki","sequence":"additional","affiliation":[]},{"given":"J. A. Aparisi","family":"Pozo","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Aparo","sequence":"additional","affiliation":[]},{"given":"L. Aperio","family":"Bella","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Araque","sequence":"additional","affiliation":[]},{"given":"V. Araujo","family":"Ferraz","sequence":"additional","affiliation":[]},{"given":"R. Araujo","family":"Pereira","sequence":"additional","affiliation":[]},{"given":"C.","family":"Arcangeletti","sequence":"additional","affiliation":[]},{"given":"A. T. H.","family":"Arce","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Arduh","sequence":"additional","affiliation":[]},{"given":"J-F.","family":"Arguin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Argyropoulos","sequence":"additional","affiliation":[]},{"given":"J.-H.","family":"Arling","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Armbruster","sequence":"additional","affiliation":[]},{"given":"A.","family":"Armstrong","sequence":"additional","affiliation":[]},{"given":"O.","family":"Arnaez","sequence":"additional","affiliation":[]},{"given":"H.","family":"Arnold","sequence":"additional","affiliation":[]},{"given":"Z. P. Arrubarrena","family":"Tame","sequence":"additional","affiliation":[]},{"given":"G.","family":"Artoni","sequence":"additional","affiliation":[]},{"given":"S.","family":"Artz","sequence":"additional","affiliation":[]},{"given":"S.","family":"Asai","sequence":"additional","affiliation":[]},{"given":"T.","family":"Asawatavonvanich","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Asbah","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Asimakopoulou","sequence":"additional","affiliation":[]},{"given":"L.","family":"Asquith","sequence":"additional","affiliation":[]},{"given":"J.","family":"Assahsah","sequence":"additional","affiliation":[]},{"given":"K.","family":"Assamagan","sequence":"additional","affiliation":[]},{"given":"R.","family":"Astalos","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Atkin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Atkinson","sequence":"additional","affiliation":[]},{"given":"N. B.","family":"Atlay","sequence":"additional","affiliation":[]},{"given":"H.","family":"Atmani","sequence":"additional","affiliation":[]},{"given":"K.","family":"Augsten","sequence":"additional","affiliation":[]},{"given":"G.","family":"Avolio","sequence":"additional","affiliation":[]},{"given":"M. K.","family":"Ayoub","sequence":"additional","affiliation":[]},{"given":"G.","family":"Azuelos","sequence":"additional","affiliation":[]},{"given":"H.","family":"Bachacou","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bachas","sequence":"additional","affiliation":[]},{"given":"M.","family":"Backes","sequence":"additional","affiliation":[]},{"given":"F.","family":"Backman","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bagnaia","sequence":"additional","affiliation":[]},{"given":"H.","family":"Bahrasemani","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Bailey","sequence":"additional","affiliation":[]},{"given":"V. R.","family":"Bailey","sequence":"additional","affiliation":[]},{"given":"J. T.","family":"Baines","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bakalis","sequence":"additional","affiliation":[]},{"given":"O. K.","family":"Baker","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Bakker","sequence":"additional","affiliation":[]},{"given":"D. Bakshi","family":"Gupta","sequence":"additional","affiliation":[]},{"given":"S.","family":"Balaji","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Baldin","sequence":"additional","affiliation":[]},{"given":"P.","family":"Balek","sequence":"additional","affiliation":[]},{"given":"F.","family":"Balli","sequence":"additional","affiliation":[]},{"given":"W. K.","family":"Balunas","sequence":"additional","affiliation":[]},{"given":"J.","family":"Balz","sequence":"additional","affiliation":[]},{"given":"E.","family":"Banas","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bandyopadhyay","sequence":"additional","affiliation":[]},{"given":"Sw.","family":"Banerjee","sequence":"additional","affiliation":[]},{"given":"A. A. E.","family":"Bannoura","sequence":"additional","affiliation":[]},{"given":"L.","family":"Barak","sequence":"additional","affiliation":[]},{"given":"W. M.","family":"Barbe","sequence":"additional","affiliation":[]},{"given":"E. L.","family":"Barberio","sequence":"additional","affiliation":[]},{"given":"D.","family":"Barberis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Barbero","sequence":"additional","affiliation":[]},{"given":"G.","family":"Barbour","sequence":"additional","affiliation":[]},{"given":"T.","family":"Barillari","sequence":"additional","affiliation":[]},{"given":"M-S.","family":"Barisits","sequence":"additional","affiliation":[]},{"given":"J.","family":"Barkeloo","sequence":"additional","affiliation":[]},{"given":"T.","family":"Barklow","sequence":"additional","affiliation":[]},{"given":"R.","family":"Barnea","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Barnett","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Barnett","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Barnovska-Blenessy","sequence":"additional","affiliation":[]},{"given":"A.","family":"Baroncelli","sequence":"additional","affiliation":[]},{"given":"G.","family":"Barone","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Barr","sequence":"additional","affiliation":[]},{"given":"L.","family":"Barranco Navarro","sequence":"additional","affiliation":[]},{"given":"F.","family":"Barreiro","sequence":"additional","affiliation":[]},{"given":"J.","family":"Barreiro Guimar\u00e3es da Costa","sequence":"additional","affiliation":[]},{"given":"S.","family":"Barsov","sequence":"additional","affiliation":[]},{"given":"R.","family":"Bartoldus","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bartolini","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Barton","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bartos","sequence":"additional","affiliation":[]},{"given":"A.","family":"Basalaev","sequence":"additional","affiliation":[]},{"given":"A.","family":"Basan","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bassalat","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Basso","sequence":"additional","affiliation":[]},{"given":"R. L.","family":"Bates","sequence":"additional","affiliation":[]},{"given":"S.","family":"Batlamous","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Batley","sequence":"additional","affiliation":[]},{"given":"B.","family":"Batool","sequence":"additional","affiliation":[]},{"given":"M.","family":"Battaglia","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bauce","sequence":"additional","affiliation":[]},{"given":"F.","family":"Bauer","sequence":"additional","affiliation":[]},{"given":"K. T.","family":"Bauer","sequence":"additional","affiliation":[]},{"given":"H. S.","family":"Bawa","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"Beacham","sequence":"additional","affiliation":[]},{"given":"T.","family":"Beau","sequence":"additional","affiliation":[]},{"given":"P. H.","family":"Beauchemin","sequence":"additional","affiliation":[]},{"given":"F.","family":"Becherer","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bechtle","sequence":"additional","affiliation":[]},{"given":"H. C.","family":"Beck","sequence":"additional","affiliation":[]},{"given":"H. P.","family":"Beck","sequence":"additional","affiliation":[]},{"given":"K.","family":"Becker","sequence":"additional","affiliation":[]},{"given":"C.","family":"Becot","sequence":"additional","affiliation":[]},{"given":"A.","family":"Beddall","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Beddall","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Bednyakov","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bedognetti","sequence":"additional","affiliation":[]},{"given":"C. P.","family":"Bee","sequence":"additional","affiliation":[]},{"given":"T. A.","family":"Beermann","sequence":"additional","affiliation":[]},{"given":"M.","family":"Begalli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Begel","sequence":"additional","affiliation":[]},{"given":"A.","family":"Behera","sequence":"additional","affiliation":[]},{"given":"J. K.","family":"Behr","sequence":"additional","affiliation":[]},{"given":"F.","family":"Beisiegel","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Bell","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bella","sequence":"additional","affiliation":[]},{"given":"L.","family":"Bellagamba","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bellerive","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bellos","sequence":"additional","affiliation":[]},{"given":"K.","family":"Beloborodov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Belotskiy","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Belyaev","sequence":"additional","affiliation":[]},{"given":"D.","family":"Benchekroun","sequence":"additional","affiliation":[]},{"given":"N.","family":"Benekos","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Benhammou","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Benjamin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Benoit","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Bensinger","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bentvelsen","sequence":"additional","affiliation":[]},{"given":"L.","family":"Beresford","sequence":"additional","affiliation":[]},{"given":"M.","family":"Beretta","sequence":"additional","affiliation":[]},{"given":"D.","family":"Berge","sequence":"additional","affiliation":[]},{"given":"E. Bergeaas","family":"Kuutmann","sequence":"additional","affiliation":[]},{"given":"N.","family":"Berger","sequence":"additional","affiliation":[]},{"given":"B.","family":"Bergmann","sequence":"additional","affiliation":[]},{"given":"L. J.","family":"Bergsten","sequence":"additional","affiliation":[]},{"given":"J.","family":"Beringer","sequence":"additional","affiliation":[]},{"given":"S.","family":"Berlendis","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bernardi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bernius","sequence":"additional","affiliation":[]},{"given":"F. U.","family":"Bernlochner","sequence":"additional","affiliation":[]},{"given":"T.","family":"Berry","sequence":"additional","affiliation":[]},{"given":"P.","family":"Berta","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bertella","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Bertram","sequence":"additional","affiliation":[]},{"given":"O.","family":"Bessidskaia Bylund","sequence":"additional","affiliation":[]},{"given":"N.","family":"Besson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bethani","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bethke","sequence":"additional","affiliation":[]},{"given":"A.","family":"Betti","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Bevan","sequence":"additional","affiliation":[]},{"given":"J.","family":"Beyer","sequence":"additional","affiliation":[]},{"given":"D. S.","family":"Bhattacharya","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bhattarai","sequence":"additional","affiliation":[]},{"given":"R.","family":"Bi","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Bianchi","sequence":"additional","affiliation":[]},{"given":"O.","family":"Biebel","sequence":"additional","affiliation":[]},{"given":"D.","family":"Biedermann","sequence":"additional","affiliation":[]},{"given":"R.","family":"Bielski","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bierwagen","sequence":"additional","affiliation":[]},{"given":"N. V.","family":"Biesuz","sequence":"additional","affiliation":[]},{"given":"M.","family":"Biglietti","sequence":"additional","affiliation":[]},{"given":"T. R. V.","family":"Billoud","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bindi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bingul","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Biondi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Birman","sequence":"additional","affiliation":[]},{"given":"T.","family":"Bisanz","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Biswal","sequence":"additional","affiliation":[]},{"given":"D.","family":"Biswas","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bitadze","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bittrich","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bj\u00f8rke","sequence":"additional","affiliation":[]},{"given":"T.","family":"Blazek","sequence":"additional","affiliation":[]},{"given":"I.","family":"Bloch","sequence":"additional","affiliation":[]},{"given":"C.","family":"Blocker","sequence":"additional","affiliation":[]},{"given":"A.","family":"Blue","sequence":"additional","affiliation":[]},{"given":"U.","family":"Blumenschein","sequence":"additional","affiliation":[]},{"given":"G. J.","family":"Bobbink","sequence":"additional","affiliation":[]},{"given":"V. S.","family":"Bobrovnikov","sequence":"additional","affiliation":[]},{"given":"S. S.","family":"Bocchetta","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bocci","sequence":"additional","affiliation":[]},{"given":"D.","family":"Bogavac","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Bogdanchikov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bohm","sequence":"additional","affiliation":[]},{"given":"V.","family":"Boisvert","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bokan","sequence":"additional","affiliation":[]},{"given":"T.","family":"Bold","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Bolz","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bomben","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bona","sequence":"additional","affiliation":[]},{"given":"J. S.","family":"Bonilla","sequence":"additional","affiliation":[]},{"given":"M.","family":"Boonekamp","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Booth","sequence":"additional","affiliation":[]},{"given":"H. M.","family":"Borecka-Bielska","sequence":"additional","affiliation":[]},{"given":"L. S.","family":"Borgna","sequence":"additional","affiliation":[]},{"given":"A.","family":"Borisov","sequence":"additional","affiliation":[]},{"given":"G.","family":"Borissov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Bortfeldt","sequence":"additional","affiliation":[]},{"given":"D.","family":"Bortoletto","sequence":"additional","affiliation":[]},{"given":"D.","family":"Boscherini","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bosman","sequence":"additional","affiliation":[]},{"given":"J. D. Bossio","family":"Sola","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bouaouda","sequence":"additional","affiliation":[]},{"given":"J.","family":"Boudreau","sequence":"additional","affiliation":[]},{"given":"E. V.","family":"Bouhova-Thacker","sequence":"additional","affiliation":[]},{"given":"D.","family":"Boumediene","sequence":"additional","affiliation":[]},{"given":"S. K.","family":"Boutle","sequence":"additional","affiliation":[]},{"given":"A.","family":"Boveia","sequence":"additional","affiliation":[]},{"given":"J.","family":"Boyd","sequence":"additional","affiliation":[]},{"given":"D.","family":"Boye","sequence":"additional","affiliation":[]},{"given":"I. R.","family":"Boyko","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Bozson","sequence":"additional","affiliation":[]},{"given":"J.","family":"Bracinik","sequence":"additional","affiliation":[]},{"given":"N.","family":"Brahimi","sequence":"additional","affiliation":[]},{"given":"G.","family":"Brandt","sequence":"additional","affiliation":[]},{"given":"O.","family":"Brandt","sequence":"additional","affiliation":[]},{"given":"F.","family":"Braren","sequence":"additional","affiliation":[]},{"given":"B.","family":"Brau","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Brau","sequence":"additional","affiliation":[]},{"given":"W. D. Breaden","family":"Madden","sequence":"additional","affiliation":[]},{"given":"K.","family":"Brendlinger","sequence":"additional","affiliation":[]},{"given":"L.","family":"Brenner","sequence":"additional","affiliation":[]},{"given":"R.","family":"Brenner","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bressler","sequence":"additional","affiliation":[]},{"given":"B.","family":"Brickwedde","sequence":"additional","affiliation":[]},{"given":"D. L.","family":"Briglin","sequence":"additional","affiliation":[]},{"given":"D.","family":"Britton","sequence":"additional","affiliation":[]},{"given":"D.","family":"Britzger","sequence":"additional","affiliation":[]},{"given":"I.","family":"Brock","sequence":"additional","affiliation":[]},{"given":"R.","family":"Brock","sequence":"additional","affiliation":[]},{"given":"G.","family":"Brooijmans","sequence":"additional","affiliation":[]},{"given":"W. K.","family":"Brooks","sequence":"additional","affiliation":[]},{"given":"E.","family":"Brost","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Broughton","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Bruckman de Renstrom","sequence":"additional","affiliation":[]},{"given":"D.","family":"Bruncko","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bruni","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bruni","sequence":"additional","affiliation":[]},{"given":"L. S.","family":"Bruni","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bruno","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bruschi","sequence":"additional","affiliation":[]},{"given":"N.","family":"Bruscino","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bryant","sequence":"additional","affiliation":[]},{"given":"L.","family":"Bryngemark","sequence":"additional","affiliation":[]},{"given":"T.","family":"Buanes","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Buat","sequence":"additional","affiliation":[]},{"given":"P.","family":"Buchholz","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Buckley","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Budagov","sequence":"additional","affiliation":[]},{"given":"M. K.","family":"Bugge","sequence":"additional","affiliation":[]},{"given":"F.","family":"B\u00fchrer","sequence":"additional","affiliation":[]},{"given":"O.","family":"Bulekov","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Burch","sequence":"additional","affiliation":[]},{"given":"S.","family":"Burdin","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Burgard","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Burger","sequence":"additional","affiliation":[]},{"given":"B.","family":"Burghgrave","sequence":"additional","affiliation":[]},{"given":"J. T. P.","family":"Burr","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Burton","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Burzynski","sequence":"additional","affiliation":[]},{"given":"V.","family":"B\u00fcscher","sequence":"additional","affiliation":[]},{"given":"E.","family":"Buschmann","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Bussey","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Butler","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Buttar","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Butterworth","sequence":"additional","affiliation":[]},{"given":"P.","family":"Butti","sequence":"additional","affiliation":[]},{"given":"W.","family":"Buttinger","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Buxo Vazquez","sequence":"additional","affiliation":[]},{"given":"A.","family":"Buzatu","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Buzykaev","sequence":"additional","affiliation":[]},{"given":"G.","family":"Cabras","sequence":"additional","affiliation":[]},{"given":"S.","family":"Cabrera Urb\u00e1n","sequence":"additional","affiliation":[]},{"given":"D.","family":"Caforio","sequence":"additional","affiliation":[]},{"given":"H.","family":"Cai","sequence":"additional","affiliation":[]},{"given":"V. M. M.","family":"Cairo","sequence":"additional","affiliation":[]},{"given":"O.","family":"Cakir","sequence":"additional","affiliation":[]},{"given":"N.","family":"Calace","sequence":"additional","affiliation":[]},{"given":"P.","family":"Calafiura","sequence":"additional","affiliation":[]},{"given":"A.","family":"Calandri","sequence":"additional","affiliation":[]},{"given":"G.","family":"Calderini","sequence":"additional","affiliation":[]},{"given":"P.","family":"Calfayan","sequence":"additional","affiliation":[]},{"given":"G.","family":"Callea","sequence":"additional","affiliation":[]},{"given":"L. P.","family":"Caloba","sequence":"additional","affiliation":[]},{"given":"A.","family":"Caltabiano","sequence":"additional","affiliation":[]},{"given":"S.","family":"Calvente Lopez","sequence":"additional","affiliation":[]},{"given":"D.","family":"Calvet","sequence":"additional","affiliation":[]},{"given":"S.","family":"Calvet","sequence":"additional","affiliation":[]},{"given":"T. P.","family":"Calvet","sequence":"additional","affiliation":[]},{"given":"M.","family":"Calvetti","sequence":"additional","affiliation":[]},{"given":"R.","family":"Camacho Toro","sequence":"additional","affiliation":[]},{"given":"S.","family":"Camarda","sequence":"additional","affiliation":[]},{"given":"D.","family":"Camarero Munoz","sequence":"additional","affiliation":[]},{"given":"P.","family":"Camarri","sequence":"additional","affiliation":[]},{"given":"D.","family":"Cameron","sequence":"additional","affiliation":[]},{"given":"C.","family":"Camincher","sequence":"additional","affiliation":[]},{"given":"S.","family":"Campana","sequence":"additional","affiliation":[]},{"given":"M.","family":"Campanelli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Camplani","sequence":"additional","affiliation":[]},{"given":"A.","family":"Campoverde","sequence":"additional","affiliation":[]},{"given":"V.","family":"Canale","sequence":"additional","affiliation":[]},{"given":"A.","family":"Canesse","sequence":"additional","affiliation":[]},{"given":"M. Cano","family":"Bret","sequence":"additional","affiliation":[]},{"given":"J.","family":"Cantero","sequence":"additional","affiliation":[]},{"given":"T.","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Cao","sequence":"additional","affiliation":[]},{"given":"M. D. M.","family":"Capeans Garrido","sequence":"additional","affiliation":[]},{"given":"M.","family":"Capua","sequence":"additional","affiliation":[]},{"given":"R.","family":"Cardarelli","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cardillo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Carducci","sequence":"additional","affiliation":[]},{"given":"I.","family":"Carli","sequence":"additional","affiliation":[]},{"given":"T.","family":"Carli","sequence":"additional","affiliation":[]},{"given":"G.","family":"Carlino","sequence":"additional","affiliation":[]},{"given":"B. T.","family":"Carlson","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Carlson","sequence":"additional","affiliation":[]},{"given":"L.","family":"Carminati","sequence":"additional","affiliation":[]},{"given":"R. M. D.","family":"Carney","sequence":"additional","affiliation":[]},{"given":"S.","family":"Caron","sequence":"additional","affiliation":[]},{"given":"E.","family":"Carquin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Carr\u00e1","sequence":"additional","affiliation":[]},{"given":"J. W. S.","family":"Carter","sequence":"additional","affiliation":[]},{"given":"M. P.","family":"Casado","sequence":"additional","affiliation":[]},{"given":"A. F.","family":"Casha","sequence":"additional","affiliation":[]},{"given":"R.","family":"Castelijn","sequence":"additional","affiliation":[]},{"given":"F. L.","family":"Castillo","sequence":"additional","affiliation":[]},{"given":"L.","family":"Castillo Garcia","sequence":"additional","affiliation":[]},{"given":"V.","family":"Castillo Gimenez","sequence":"additional","affiliation":[]},{"given":"N. F.","family":"Castro","sequence":"additional","affiliation":[]},{"given":"A.","family":"Catinaccio","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Catmore","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cattai","sequence":"additional","affiliation":[]},{"given":"V.","family":"Cavaliere","sequence":"additional","affiliation":[]},{"given":"E.","family":"Cavallaro","sequence":"additional","affiliation":[]},{"given":"M.","family":"Cavalli-Sforza","sequence":"additional","affiliation":[]},{"given":"V.","family":"Cavasinni","sequence":"additional","affiliation":[]},{"given":"E.","family":"Celebi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Cerda Alberich","sequence":"additional","affiliation":[]},{"given":"K.","family":"Cerny","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Cerqueira","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cerri","sequence":"additional","affiliation":[]},{"given":"L.","family":"Cerrito","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cerutti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cervelli","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Cetin","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Chadi","sequence":"additional","affiliation":[]},{"given":"D.","family":"Chakraborty","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chan","sequence":"additional","affiliation":[]},{"given":"W. S.","family":"Chan","sequence":"additional","affiliation":[]},{"given":"W. Y.","family":"Chan","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Chapman","sequence":"additional","affiliation":[]},{"given":"B.","family":"Chargeishvili","sequence":"additional","affiliation":[]},{"given":"D. G.","family":"Charlton","sequence":"additional","affiliation":[]},{"given":"T. P.","family":"Charman","sequence":"additional","affiliation":[]},{"given":"C. C.","family":"Chau","sequence":"additional","affiliation":[]},{"given":"S.","family":"Che","sequence":"additional","affiliation":[]},{"given":"S.","family":"Chekanov","sequence":"additional","affiliation":[]},{"given":"S. V.","family":"Chekulaev","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Chelkov","sequence":"additional","affiliation":[]},{"given":"B.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"C. H.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"H.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"S.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"X.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Y-H.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"H. C.","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cheplakov","sequence":"additional","affiliation":[]},{"given":"E.","family":"Cheremushkina","sequence":"additional","affiliation":[]},{"given":"R.","family":"Cherkaoui El Moursli","sequence":"additional","affiliation":[]},{"given":"E.","family":"Cheu","sequence":"additional","affiliation":[]},{"given":"K.","family":"Cheung","sequence":"additional","affiliation":[]},{"given":"T. J. A.","family":"Cheval\u00e9rias","sequence":"additional","affiliation":[]},{"given":"L.","family":"Chevalier","sequence":"additional","affiliation":[]},{"given":"V.","family":"Chiarella","sequence":"additional","affiliation":[]},{"given":"G.","family":"Chiarelli","sequence":"additional","affiliation":[]},{"given":"G.","family":"Chiodini","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Chisholm","sequence":"additional","affiliation":[]},{"given":"A.","family":"Chitan","sequence":"additional","affiliation":[]},{"given":"I.","family":"Chiu","sequence":"additional","affiliation":[]},{"given":"Y. H.","family":"Chiu","sequence":"additional","affiliation":[]},{"given":"M. V.","family":"Chizhov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Choi","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Chomont","sequence":"additional","affiliation":[]},{"given":"S.","family":"Chouridou","sequence":"additional","affiliation":[]},{"given":"E. Y. S.","family":"Chow","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Chu","sequence":"additional","affiliation":[]},{"given":"X.","family":"Chu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chudoba","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Chwastowski","sequence":"additional","affiliation":[]},{"given":"L.","family":"Chytka","sequence":"additional","affiliation":[]},{"given":"D.","family":"Cieri","sequence":"additional","affiliation":[]},{"given":"K. M.","family":"Ciesla","sequence":"additional","affiliation":[]},{"given":"D.","family":"Cinca","sequence":"additional","affiliation":[]},{"given":"V.","family":"Cindro","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Cioar\u0103","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ciocio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cirotto","sequence":"additional","affiliation":[]},{"given":"Z. H.","family":"Citron","sequence":"additional","affiliation":[]},{"given":"M.","family":"Citterio","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Ciubotaru","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Ciungu","sequence":"additional","affiliation":[]},{"given":"A.","family":"Clark","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Clark","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Clark","sequence":"additional","affiliation":[]},{"given":"C.","family":"Clement","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Coadou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Cobal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Coccaro","sequence":"additional","affiliation":[]},{"given":"J.","family":"Cochran","sequence":"additional","affiliation":[]},{"given":"R.","family":"Coelho Lopes De Sa","sequence":"additional","affiliation":[]},{"given":"H.","family":"Cohen","sequence":"additional","affiliation":[]},{"given":"A. E. C.","family":"Coimbra","sequence":"additional","affiliation":[]},{"given":"B.","family":"Cole","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"Colijn","sequence":"additional","affiliation":[]},{"given":"J.","family":"Collot","sequence":"additional","affiliation":[]},{"given":"P. Conde","family":"Mui\u00f1o","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Connell","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Connelly","sequence":"additional","affiliation":[]},{"given":"S.","family":"Constantinescu","sequence":"additional","affiliation":[]},{"given":"F.","family":"Conventi","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Cooper-Sarkar","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cormier","sequence":"additional","affiliation":[]},{"given":"K. J. R.","family":"Cormier","sequence":"additional","affiliation":[]},{"given":"L. D.","family":"Corpe","sequence":"additional","affiliation":[]},{"given":"M.","family":"Corradi","sequence":"additional","affiliation":[]},{"given":"E. E.","family":"Corrigan","sequence":"additional","affiliation":[]},{"given":"F.","family":"Corriveau","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Costa","sequence":"additional","affiliation":[]},{"given":"F.","family":"Costanza","sequence":"additional","affiliation":[]},{"given":"D.","family":"Costanzo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Cowan","sequence":"additional","affiliation":[]},{"given":"J. W.","family":"Cowley","sequence":"additional","affiliation":[]},{"given":"J.","family":"Crane","sequence":"additional","affiliation":[]},{"given":"K.","family":"Cranmer","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Crawley","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Creager","sequence":"additional","affiliation":[]},{"given":"S.","family":"Cr\u00e9p\u00e9-Renaudin","sequence":"additional","affiliation":[]},{"given":"F.","family":"Crescioli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Cristinziani","sequence":"additional","affiliation":[]},{"given":"V.","family":"Croft","sequence":"additional","affiliation":[]},{"given":"G.","family":"Crosetti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cueto","sequence":"additional","affiliation":[]},{"given":"T.","family":"Cuhadar Donszelmann","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Cukierman","sequence":"additional","affiliation":[]},{"given":"W. R.","family":"Cunningham","sequence":"additional","affiliation":[]},{"given":"S.","family":"Czekierda","sequence":"additional","affiliation":[]},{"given":"P.","family":"Czodrowski","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Da Cunha Sargedas De Sousa","sequence":"additional","affiliation":[]},{"given":"J. V.","family":"Da Fonseca Pinto","sequence":"additional","affiliation":[]},{"given":"C.","family":"Da Via","sequence":"additional","affiliation":[]},{"given":"W.","family":"Dabrowski","sequence":"additional","affiliation":[]},{"given":"F.","family":"Dachs","sequence":"additional","affiliation":[]},{"given":"T.","family":"Dado","sequence":"additional","affiliation":[]},{"given":"S.","family":"Dahbi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Dai","sequence":"additional","affiliation":[]},{"given":"C.","family":"Dallapiccola","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dam","sequence":"additional","affiliation":[]},{"given":"G.","family":"D\u2019amen","sequence":"additional","affiliation":[]},{"given":"V.","family":"D\u2019Amico","sequence":"additional","affiliation":[]},{"given":"J.","family":"Damp","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Dandoy","sequence":"additional","affiliation":[]},{"given":"M. F.","family":"Daneri","sequence":"additional","affiliation":[]},{"given":"N. S.","family":"Dann","sequence":"additional","affiliation":[]},{"given":"M.","family":"Danninger","sequence":"additional","affiliation":[]},{"given":"V.","family":"Dao","sequence":"additional","affiliation":[]},{"given":"G.","family":"Darbo","sequence":"additional","affiliation":[]},{"given":"O.","family":"Dartsi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dattagupta","sequence":"additional","affiliation":[]},{"given":"T.","family":"Daubney","sequence":"additional","affiliation":[]},{"given":"S.","family":"D\u2019Auria","sequence":"additional","affiliation":[]},{"given":"C.","family":"David","sequence":"additional","affiliation":[]},{"given":"T.","family":"Davidek","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Davis","sequence":"additional","affiliation":[]},{"given":"I.","family":"Dawson","sequence":"additional","affiliation":[]},{"given":"K.","family":"De","sequence":"additional","affiliation":[]},{"given":"R.","family":"De Asmundis","sequence":"additional","affiliation":[]},{"given":"M.","family":"De Beurs","sequence":"additional","affiliation":[]},{"given":"S.","family":"De Castro","sequence":"additional","affiliation":[]},{"given":"S.","family":"De Cecco","sequence":"additional","affiliation":[]},{"given":"N.","family":"De Groot","sequence":"additional","affiliation":[]},{"given":"P.","family":"de Jong","sequence":"additional","affiliation":[]},{"given":"H.","family":"De la Torre","sequence":"additional","affiliation":[]},{"given":"A.","family":"De Maria","sequence":"additional","affiliation":[]},{"given":"D.","family":"De Pedis","sequence":"additional","affiliation":[]},{"given":"A.","family":"De Salvo","sequence":"additional","affiliation":[]},{"given":"U.","family":"De Sanctis","sequence":"additional","affiliation":[]},{"given":"M.","family":"De Santis","sequence":"additional","affiliation":[]},{"given":"A.","family":"De Santo","sequence":"additional","affiliation":[]},{"given":"K.","family":"De Vasconcelos Corga","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"De Vivie De Regie","sequence":"additional","affiliation":[]},{"given":"C.","family":"Debenedetti","sequence":"additional","affiliation":[]},{"given":"D. V.","family":"Dedovich","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Deiana","sequence":"additional","affiliation":[]},{"given":"J.","family":"Del Peso","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Delabat Diaz","sequence":"additional","affiliation":[]},{"given":"D.","family":"Delgove","sequence":"additional","affiliation":[]},{"given":"F.","family":"Deliot","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Delitzsch","sequence":"additional","affiliation":[]},{"given":"M.","family":"Della Pietra","sequence":"additional","affiliation":[]},{"given":"D.","family":"Della Volpe","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dell\u2019Acqua","sequence":"additional","affiliation":[]},{"given":"L.","family":"Dell\u2019Asta","sequence":"additional","affiliation":[]},{"given":"M.","family":"Delmastro","sequence":"additional","affiliation":[]},{"given":"C.","family":"Delporte","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Delsart","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"DeMarco","sequence":"additional","affiliation":[]},{"given":"S.","family":"Demers","sequence":"additional","affiliation":[]},{"given":"M.","family":"Demichev","sequence":"additional","affiliation":[]},{"given":"G.","family":"Demontigny","sequence":"additional","affiliation":[]},{"given":"S. P.","family":"Denisov","sequence":"additional","affiliation":[]},{"given":"L.","family":"D\u2019Eramo","sequence":"additional","affiliation":[]},{"given":"D.","family":"Derendarz","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Derkaoui","sequence":"additional","affiliation":[]},{"given":"F.","family":"Derue","sequence":"additional","affiliation":[]},{"given":"P.","family":"Dervan","sequence":"additional","affiliation":[]},{"given":"K.","family":"Desch","sequence":"additional","affiliation":[]},{"given":"C.","family":"Deterre","sequence":"additional","affiliation":[]},{"given":"K.","family":"Dette","sequence":"additional","affiliation":[]},{"given":"C.","family":"Deutsch","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Devesa","sequence":"additional","affiliation":[]},{"given":"P. O.","family":"Deviveiros","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Di Bello","sequence":"additional","affiliation":[]},{"given":"A.","family":"Di Ciaccio","sequence":"additional","affiliation":[]},{"given":"L.","family":"Di Ciaccio","sequence":"additional","affiliation":[]},{"given":"W. K.","family":"Di Clemente","sequence":"additional","affiliation":[]},{"given":"C.","family":"Di Donato","sequence":"additional","affiliation":[]},{"given":"A.","family":"Di Girolamo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Di Gregorio","sequence":"additional","affiliation":[]},{"given":"B.","family":"Di Micco","sequence":"additional","affiliation":[]},{"given":"R.","family":"Di Nardo","sequence":"additional","affiliation":[]},{"given":"K. F.","family":"Di Petrillo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Di Sipio","sequence":"additional","affiliation":[]},{"given":"C.","family":"Diaconu","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Dias","sequence":"additional","affiliation":[]},{"given":"T. Dias","family":"Do Vale","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Diaz","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dickinson","sequence":"additional","affiliation":[]},{"given":"E. B.","family":"Diehl","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dietrich","sequence":"additional","affiliation":[]},{"given":"S.","family":"D\u00edez Cornell","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dimitrievska","sequence":"additional","affiliation":[]},{"given":"W.","family":"Ding","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dingfelder","sequence":"additional","affiliation":[]},{"given":"F.","family":"Dittus","sequence":"additional","affiliation":[]},{"given":"F.","family":"Djama","sequence":"additional","affiliation":[]},{"given":"T.","family":"Djobava","sequence":"additional","affiliation":[]},{"given":"J. I.","family":"Djuvsland","sequence":"additional","affiliation":[]},{"given":"M. A. B.","family":"Do Vale","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dobre","sequence":"additional","affiliation":[]},{"given":"D.","family":"Dodsworth","sequence":"additional","affiliation":[]},{"given":"C.","family":"Doglioni","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dolejsi","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Dolezal","sequence":"additional","affiliation":[]},{"given":"M.","family":"Donadelli","sequence":"additional","affiliation":[]},{"given":"B.","family":"Dong","sequence":"additional","affiliation":[]},{"given":"J.","family":"Donini","sequence":"additional","affiliation":[]},{"given":"A.","family":"D\u2019onofrio","sequence":"additional","affiliation":[]},{"given":"M.","family":"D\u2019Onofrio","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dopke","sequence":"additional","affiliation":[]},{"given":"A.","family":"Doria","sequence":"additional","affiliation":[]},{"given":"M. T.","family":"Dova","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Doyle","sequence":"additional","affiliation":[]},{"given":"E.","family":"Drechsler","sequence":"additional","affiliation":[]},{"given":"E.","family":"Dreyer","sequence":"additional","affiliation":[]},{"given":"T.","family":"Dreyer","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Drobac","sequence":"additional","affiliation":[]},{"given":"D.","family":"Du","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Duan","sequence":"additional","affiliation":[]},{"given":"F.","family":"Dubinin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dubovsky","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dubreuil","sequence":"additional","affiliation":[]},{"given":"E.","family":"Duchovni","sequence":"additional","affiliation":[]},{"given":"G.","family":"Duckeck","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ducourthial","sequence":"additional","affiliation":[]},{"given":"O. A.","family":"Ducu","sequence":"additional","affiliation":[]},{"given":"D.","family":"Duda","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dudarev","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Dudder","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Duffield","sequence":"additional","affiliation":[]},{"given":"L.","family":"Duflot","sequence":"additional","affiliation":[]},{"given":"M.","family":"D\u00fchrssen","sequence":"additional","affiliation":[]},{"given":"C.","family":"D\u00fclsen","sequence":"additional","affiliation":[]},{"given":"lsen M.","family":"Dumancic","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Dumitriu","sequence":"additional","affiliation":[]},{"given":"A. K.","family":"Duncan","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dunford","sequence":"additional","affiliation":[]},{"given":"A.","family":"Duperrin","sequence":"additional","affiliation":[]},{"given":"H. Duran","family":"Yildiz","sequence":"additional","affiliation":[]},{"given":"M.","family":"D\u00fcren","sequence":"additional","affiliation":[]},{"given":"A.","family":"Durglishvili","sequence":"additional","affiliation":[]},{"given":"D.","family":"Duschinger","sequence":"additional","affiliation":[]},{"given":"B.","family":"Dutta","sequence":"additional","affiliation":[]},{"given":"D.","family":"Duvnjak","sequence":"additional","affiliation":[]},{"given":"B. L.","family":"Dwyer","sequence":"additional","affiliation":[]},{"given":"G. I.","family":"Dyckes","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dyndal","sequence":"additional","affiliation":[]},{"given":"S.","family":"Dysch","sequence":"additional","affiliation":[]},{"given":"B. S.","family":"Dziedzic","sequence":"additional","affiliation":[]},{"given":"K. M.","family":"Ecker","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Eggleston","sequence":"additional","affiliation":[]},{"given":"T.","family":"Eifert","sequence":"additional","affiliation":[]},{"given":"G.","family":"Eigen","sequence":"additional","affiliation":[]},{"given":"K.","family":"Einsweiler","sequence":"additional","affiliation":[]},{"given":"T.","family":"Ekelof","sequence":"additional","affiliation":[]},{"given":"H.","family":"El Jarrari","sequence":"additional","affiliation":[]},{"given":"R.","family":"El Kosseifi","sequence":"additional","affiliation":[]},{"given":"V.","family":"Ellajosyula","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ellert","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ellinghaus","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Elliot","sequence":"additional","affiliation":[]},{"given":"N.","family":"Ellis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Elmsheuser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Elsing","sequence":"additional","affiliation":[]},{"given":"D.","family":"Emeliyanov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Emerman","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Enari","sequence":"additional","affiliation":[]},{"given":"M. B.","family":"Epland","sequence":"additional","affiliation":[]},{"given":"J.","family":"Erdmann","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ereditato","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Erland","sequence":"additional","affiliation":[]},{"given":"M.","family":"Errenst","sequence":"additional","affiliation":[]},{"given":"M.","family":"Escalier","sequence":"additional","affiliation":[]},{"given":"C.","family":"Escobar","sequence":"additional","affiliation":[]},{"given":"O.","family":"Estrada Pastor","sequence":"additional","affiliation":[]},{"given":"E.","family":"Etzion","sequence":"additional","affiliation":[]},{"given":"H.","family":"Evans","sequence":"additional","affiliation":[]},{"given":"M. O.","family":"Evans","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ezhilov","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fabbri","sequence":"additional","affiliation":[]},{"given":"L.","family":"Fabbri","sequence":"additional","affiliation":[]},{"given":"V.","family":"Fabiani","sequence":"additional","affiliation":[]},{"given":"G.","family":"Facini","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Faisca Rodrigues Pereira","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Fakhrutdinov","sequence":"additional","affiliation":[]},{"given":"S.","family":"Falciano","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Falke","sequence":"additional","affiliation":[]},{"given":"S.","family":"Falke","sequence":"additional","affiliation":[]},{"given":"J.","family":"Faltova","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Fang","sequence":"additional","affiliation":[]},{"given":"G.","family":"Fanourakis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Fanti","sequence":"additional","affiliation":[]},{"given":"M.","family":"Faraj","sequence":"additional","affiliation":[]},{"given":"A.","family":"Farbin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Farilla","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Farina","sequence":"additional","affiliation":[]},{"given":"T.","family":"Farooque","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Farrington","sequence":"additional","affiliation":[]},{"given":"P.","family":"Farthouat","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fassi","sequence":"additional","affiliation":[]},{"given":"P.","family":"Fassnacht","sequence":"additional","affiliation":[]},{"given":"D.","family":"Fassouliotis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Faucci Giannelli","sequence":"additional","affiliation":[]},{"given":"W. J.","family":"Fawcett","sequence":"additional","affiliation":[]},{"given":"L.","family":"Fayard","sequence":"additional","affiliation":[]},{"given":"O. L.","family":"Fedin","sequence":"additional","affiliation":[]},{"given":"W.","family":"Fedorko","sequence":"additional","affiliation":[]},{"given":"M.","family":"Feickert","sequence":"additional","affiliation":[]},{"given":"L.","family":"Feligioni","sequence":"additional","affiliation":[]},{"given":"A.","family":"Fell","sequence":"additional","affiliation":[]},{"given":"C.","family":"Feng","sequence":"additional","affiliation":[]},{"given":"M.","family":"Feng","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Fenton","sequence":"additional","affiliation":[]},{"given":"A. B.","family":"Fenyuk","sequence":"additional","affiliation":[]},{"given":"S. W.","family":"Ferguson","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ferrando","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ferrante","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ferrari","sequence":"additional","affiliation":[]},{"given":"P.","family":"Ferrari","sequence":"additional","affiliation":[]},{"given":"R.","family":"Ferrari","sequence":"additional","affiliation":[]},{"given":"D. E.","family":"Ferreira de Lima","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ferrer","sequence":"additional","affiliation":[]},{"given":"D.","family":"Ferrere","sequence":"additional","affiliation":[]},{"given":"C.","family":"Ferretti","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fiedler","sequence":"additional","affiliation":[]},{"given":"A.","family":"Filip\u010di\u010d","sequence":"additional","affiliation":[]},{"given":"F.","family":"Filthaut","sequence":"additional","affiliation":[]},{"given":"K. D.","family":"Finelli","sequence":"additional","affiliation":[]},{"given":"M. C. N.","family":"Fiolhais","sequence":"additional","affiliation":[]},{"given":"L.","family":"Fiorini","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fischer","sequence":"additional","affiliation":[]},{"given":"W. C.","family":"Fisher","sequence":"additional","affiliation":[]},{"given":"I.","family":"Fleck","sequence":"additional","affiliation":[]},{"given":"P.","family":"Fleischmann","sequence":"additional","affiliation":[]},{"given":"T.","family":"Flick","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Flierl","sequence":"additional","affiliation":[]},{"given":"L.","family":"Flores","sequence":"additional","affiliation":[]},{"given":"L. R.","family":"Flores Castillo","sequence":"additional","affiliation":[]},{"given":"F. M.","family":"Follega","sequence":"additional","affiliation":[]},{"given":"N.","family":"Fomin","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Foo","sequence":"additional","affiliation":[]},{"given":"G. T.","family":"Forcolin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Formica","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"F\u00f6rster","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Forti","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Foster","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Foti","sequence":"additional","affiliation":[]},{"given":"D.","family":"Fournier","sequence":"additional","affiliation":[]},{"given":"H.","family":"Fox","sequence":"additional","affiliation":[]},{"given":"P.","family":"Francavilla","sequence":"additional","affiliation":[]},{"given":"S.","family":"Francescato","sequence":"additional","affiliation":[]},{"given":"M.","family":"Franchini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Franchino","sequence":"additional","affiliation":[]},{"given":"D.","family":"Francis","sequence":"additional","affiliation":[]},{"given":"L.","family":"Franconi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Franklin","sequence":"additional","affiliation":[]},{"given":"A. N.","family":"Fray","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Freeman","sequence":"additional","affiliation":[]},{"given":"B.","family":"Freund","sequence":"additional","affiliation":[]},{"given":"W. S.","family":"Freund","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Freundlich","sequence":"additional","affiliation":[]},{"given":"D. C.","family":"Frizzell","sequence":"additional","affiliation":[]},{"given":"D.","family":"Froidevaux","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Frost","sequence":"additional","affiliation":[]},{"given":"C.","family":"Fukunaga","sequence":"additional","affiliation":[]},{"given":"E.","family":"Fullana Torregrosa","sequence":"additional","affiliation":[]},{"given":"T.","family":"Fusayasu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Fuster","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gabrielli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gabrielli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gadatsch","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gadow","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gagliardi","sequence":"additional","affiliation":[]},{"given":"L. G.","family":"Gagnon","sequence":"additional","affiliation":[]},{"given":"B.","family":"Galhardo","sequence":"additional","affiliation":[]},{"given":"G. E.","family":"Gallardo","sequence":"additional","affiliation":[]},{"given":"E. J.","family":"Gallas","sequence":"additional","affiliation":[]},{"given":"B. J.","family":"Gallop","sequence":"additional","affiliation":[]},{"given":"G.","family":"Galster","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gamboa Goni","sequence":"additional","affiliation":[]},{"given":"K. K.","family":"Gan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ganguly","sequence":"additional","affiliation":[]},{"given":"J.","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Y. S.","family":"Gao","sequence":"additional","affiliation":[]},{"given":"C.","family":"Garc\u00eda","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Garc\u00eda Navarro","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Garc\u00eda Pascual","sequence":"additional","affiliation":[]},{"given":"C.","family":"Garcia-Argos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Garcia-Sciveres","sequence":"additional","affiliation":[]},{"given":"R. W.","family":"Gardner","sequence":"additional","affiliation":[]},{"given":"N.","family":"Garelli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gargiulo","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Garner","sequence":"additional","affiliation":[]},{"given":"V.","family":"Garonne","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Gasiorowski","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gaspar","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gaudiello","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gaudio","sequence":"additional","affiliation":[]},{"given":"I. L.","family":"Gavrilenko","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gavrilyuk","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gay","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gaycken","sequence":"additional","affiliation":[]},{"given":"E. N.","family":"Gazis","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Geanta","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Gee","sequence":"additional","affiliation":[]},{"given":"C. N. P.","family":"Gee","sequence":"additional","affiliation":[]},{"given":"J.","family":"Geisen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Geisen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gemme","sequence":"additional","affiliation":[]},{"given":"M. H.","family":"Genest","sequence":"additional","affiliation":[]},{"given":"C.","family":"Geng","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gentile","sequence":"additional","affiliation":[]},{"given":"S.","family":"George","sequence":"additional","affiliation":[]},{"given":"T.","family":"Geralis","sequence":"additional","affiliation":[]},{"given":"L. O.","family":"Gerlach","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gessinger-Befurt","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gessner","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ghasemi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ghasemi Bostanabad","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ghneimat","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ghosh","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ghosh","sequence":"additional","affiliation":[]},{"given":"B.","family":"Giacobbe","sequence":"additional","affiliation":[]},{"given":"S.","family":"Giagu","sequence":"additional","affiliation":[]},{"given":"N.","family":"Giangiacomi","sequence":"additional","affiliation":[]},{"given":"P.","family":"Giannetti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Giannini","sequence":"additional","affiliation":[]},{"given":"G.","family":"Giannini","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Gibson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Gignac","sequence":"additional","affiliation":[]},{"given":"D.","family":"Gillberg","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gilles","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Gingrich","sequence":"additional","affiliation":[]},{"given":"M. P.","family":"Giordani","sequence":"additional","affiliation":[]},{"given":"P. F.","family":"Giraud","sequence":"additional","affiliation":[]},{"given":"G.","family":"Giugliarelli","sequence":"additional","affiliation":[]},{"given":"D.","family":"Giugni","sequence":"additional","affiliation":[]},{"given":"F.","family":"Giuli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gkaitatzis","sequence":"additional","affiliation":[]},{"given":"I.","family":"Gkialas","sequence":"additional","affiliation":[]},{"given":"E. L.","family":"Gkougkousis","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gkountoumis","sequence":"additional","affiliation":[]},{"given":"L. K.","family":"Gladilin","sequence":"additional","affiliation":[]},{"given":"C.","family":"Glasman","sequence":"additional","affiliation":[]},{"given":"J.","family":"Glatzer","sequence":"additional","affiliation":[]},{"given":"P. C. F.","family":"Glaysher","sequence":"additional","affiliation":[]},{"given":"A.","family":"Glazov","sequence":"additional","affiliation":[]},{"given":"G. R.","family":"Gledhill","sequence":"additional","affiliation":[]},{"given":"I.","family":"Gnesi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Goblirsch-Kolb","sequence":"additional","affiliation":[]},{"given":"D.","family":"Godin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Goldfarb","sequence":"additional","affiliation":[]},{"given":"T.","family":"Golling","sequence":"additional","affiliation":[]},{"given":"D.","family":"Golubkov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gomes","sequence":"additional","affiliation":[]},{"given":"R.","family":"Goncalves Gama","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gon\u00e7alo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gonella","sequence":"additional","affiliation":[]},{"given":"L.","family":"Gonella","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gongadze","sequence":"additional","affiliation":[]},{"given":"F.","family":"Gonnella","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Gonski","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gonz\u00e1lez de la Hoz","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gonzalez Fernandez","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gonzalez-Sevilla","sequence":"additional","affiliation":[]},{"given":"G. R.","family":"Gonzalvo Rodriguez","sequence":"additional","affiliation":[]},{"given":"L.","family":"Goossens","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Gorasia","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Gorbounov","sequence":"additional","affiliation":[]},{"given":"H. A.","family":"Gordon","sequence":"additional","affiliation":[]},{"given":"B.","family":"Gorini","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gorini","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gori\u0161ek","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Goshaw","sequence":"additional","affiliation":[]},{"given":"M. I.","family":"Gostkin","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Gottardo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Gouighri","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Goussiou","sequence":"additional","affiliation":[]},{"given":"N.","family":"Govender","sequence":"additional","affiliation":[]},{"given":"C.","family":"Goy","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gozani","sequence":"additional","affiliation":[]},{"given":"I.","family":"Grabowska-Bold","sequence":"additional","affiliation":[]},{"given":"E. C.","family":"Graham","sequence":"additional","affiliation":[]},{"given":"J.","family":"Gramling","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gramstad","sequence":"additional","affiliation":[]},{"given":"S.","family":"Grancagnolo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Grandi","sequence":"additional","affiliation":[]},{"given":"V.","family":"Gratchev","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Gravila","sequence":"additional","affiliation":[]},{"given":"F. G.","family":"Gravili","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gray","sequence":"additional","affiliation":[]},{"given":"H. M.","family":"Gray","sequence":"additional","affiliation":[]},{"given":"C.","family":"Grefe","sequence":"additional","affiliation":[]},{"given":"K.","family":"Gregersen","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Gregor","sequence":"additional","affiliation":[]},{"given":"P.","family":"Grenier","sequence":"additional","affiliation":[]},{"given":"K.","family":"Grevtsov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Grieco","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Grieser","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Grillo","sequence":"additional","affiliation":[]},{"given":"K.","family":"Grimm","sequence":"additional","affiliation":[]},{"given":"S.","family":"Grinstein","sequence":"additional","affiliation":[]},{"given":"J.-F.","family":"Grivaz","sequence":"additional","affiliation":[]},{"given":"S.","family":"Groh","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gross","sequence":"additional","affiliation":[]},{"given":"J.","family":"Grosse-Knetter","sequence":"additional","affiliation":[]},{"given":"Z. J.","family":"Grout","sequence":"additional","affiliation":[]},{"given":"C.","family":"Grud","sequence":"additional","affiliation":[]},{"given":"A.","family":"Grummer","sequence":"additional","affiliation":[]},{"given":"L.","family":"Guan","sequence":"additional","affiliation":[]},{"given":"W.","family":"Guan","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gubbels","sequence":"additional","affiliation":[]},{"given":"J.","family":"Guenther","sequence":"additional","affiliation":[]},{"given":"A.","family":"Guerguichon","sequence":"additional","affiliation":[]},{"given":"J. G. R.","family":"Guerrero Rojas","sequence":"additional","affiliation":[]},{"given":"F.","family":"Guescini","sequence":"additional","affiliation":[]},{"given":"D.","family":"Guest","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gugel","sequence":"additional","affiliation":[]},{"given":"T.","family":"Guillemin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Guindon","sequence":"additional","affiliation":[]},{"given":"U.","family":"Gul","sequence":"additional","affiliation":[]},{"given":"J.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"W.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gupta","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gurbuz","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gustavino","sequence":"additional","affiliation":[]},{"given":"M.","family":"Guth","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gutierrez","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gutschow","sequence":"additional","affiliation":[]},{"given":"C.","family":"Guyot","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gwenlan","sequence":"additional","affiliation":[]},{"given":"C. B.","family":"Gwilliam","sequence":"additional","affiliation":[]},{"given":"A.","family":"Haas","sequence":"additional","affiliation":[]},{"given":"C.","family":"Haber","sequence":"additional","affiliation":[]},{"given":"H. K.","family":"Hadavand","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hadef","sequence":"additional","affiliation":[]},{"given":"M.","family":"Haleem","sequence":"additional","affiliation":[]},{"given":"J.","family":"Haley","sequence":"additional","affiliation":[]},{"given":"G.","family":"Halladjian","sequence":"additional","affiliation":[]},{"given":"G. D.","family":"Hallewell","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hamacher","sequence":"additional","affiliation":[]},{"given":"P.","family":"Hamal","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hamano","sequence":"additional","affiliation":[]},{"given":"H.","family":"Hamdaoui","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hamer","sequence":"additional","affiliation":[]},{"given":"G. N.","family":"Hamity","sequence":"additional","affiliation":[]},{"given":"K.","family":"Han","sequence":"additional","affiliation":[]},{"given":"L.","family":"Han","sequence":"additional","affiliation":[]},{"given":"S.","family":"Han","sequence":"additional","affiliation":[]},{"given":"Y. F.","family":"Han","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hanagaki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hance","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Handl","sequence":"additional","affiliation":[]},{"given":"B.","family":"Haney","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hankache","sequence":"additional","affiliation":[]},{"given":"E.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"P. H.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"E. C.","family":"Hanson","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hara","sequence":"additional","affiliation":[]},{"given":"T.","family":"Harenberg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Harkusha","sequence":"additional","affiliation":[]},{"given":"P. F.","family":"Harrison","sequence":"additional","affiliation":[]},{"given":"N. M.","family":"Hartmann","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Hasegawa","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hasib","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hassani","sequence":"additional","affiliation":[]},{"given":"S.","family":"Haug","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hauser","sequence":"additional","affiliation":[]},{"given":"L. B.","family":"Havener","sequence":"additional","affiliation":[]},{"given":"M.","family":"Havranek","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Hawkes","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Hawkings","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hayden","sequence":"additional","affiliation":[]},{"given":"C.","family":"Hayes","sequence":"additional","affiliation":[]},{"given":"R. L.","family":"Hayes","sequence":"additional","affiliation":[]},{"given":"C. P.","family":"Hays","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Hays","sequence":"additional","affiliation":[]},{"given":"H. S.","family":"Hayward","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Haywood","sequence":"additional","affiliation":[]},{"given":"F.","family":"He","sequence":"additional","affiliation":[]},{"given":"M. P.","family":"Heath","sequence":"additional","affiliation":[]},{"given":"V.","family":"Hedberg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Heer","sequence":"additional","affiliation":[]},{"given":"K. K.","family":"Heidegger","sequence":"additional","affiliation":[]},{"given":"W. D.","family":"Heidorn","sequence":"additional","affiliation":[]},{"given":"J.","family":"Heilman","sequence":"additional","affiliation":[]},{"given":"S.","family":"Heim","sequence":"additional","affiliation":[]},{"given":"T.","family":"Heim","sequence":"additional","affiliation":[]},{"given":"B.","family":"Heinemann","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Heinrich","sequence":"additional","affiliation":[]},{"given":"L.","family":"Heinrich","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hejbal","sequence":"additional","affiliation":[]},{"given":"L.","family":"Helary","sequence":"additional","affiliation":[]},{"given":"A.","family":"Held","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hellesund","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Helling","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hellman","sequence":"additional","affiliation":[]},{"given":"C.","family":"Helsens","sequence":"additional","affiliation":[]},{"given":"R. C. W.","family":"Henderson","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Heng","sequence":"additional","affiliation":[]},{"given":"L.","family":"Henkelmann","sequence":"additional","affiliation":[]},{"given":"S.","family":"Henkelmann","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Henriques Correia","sequence":"additional","affiliation":[]},{"given":"H.","family":"Herde","sequence":"additional","affiliation":[]},{"given":"V.","family":"Herget","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Hern\u00e1ndez Jim\u00e9nez","sequence":"additional","affiliation":[]},{"given":"H.","family":"Herr","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Herrmann","sequence":"additional","affiliation":[]},{"given":"T.","family":"Herrmann","sequence":"additional","affiliation":[]},{"given":"G.","family":"Herten","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hertenberger","sequence":"additional","affiliation":[]},{"given":"L.","family":"Hervas","sequence":"additional","affiliation":[]},{"given":"T. C.","family":"Herwig","sequence":"additional","affiliation":[]},{"given":"G. G.","family":"Hesketh","sequence":"additional","affiliation":[]},{"given":"N. P.","family":"Hessey","sequence":"additional","affiliation":[]},{"given":"A.","family":"Higashida","sequence":"additional","affiliation":[]},{"given":"S.","family":"Higashino","sequence":"additional","affiliation":[]},{"given":"E.","family":"Hig\u00f3n-Rodriguez","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hildebrand","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Hill","sequence":"additional","affiliation":[]},{"given":"K. K.","family":"Hill","sequence":"additional","affiliation":[]},{"given":"K. H.","family":"Hiller","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Hillier","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hils","sequence":"additional","affiliation":[]},{"given":"I.","family":"Hinchliffe","sequence":"additional","affiliation":[]},{"given":"F.","family":"Hinterkeuser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hirose","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hirose","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hirschbuehl","sequence":"additional","affiliation":[]},{"given":"B.","family":"Hiti","sequence":"additional","affiliation":[]},{"given":"O.","family":"Hladik","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Hlaluku","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hobbs","sequence":"additional","affiliation":[]},{"given":"N.","family":"Hod","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Hodgkinson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hoecker","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hohn","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hohov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Holm","sequence":"additional","affiliation":[]},{"given":"T. R.","family":"Holmes","sequence":"additional","affiliation":[]},{"given":"M.","family":"Holzbock","sequence":"additional","affiliation":[]},{"given":"L. B. A. H.","family":"Hommels","sequence":"additional","affiliation":[]},{"given":"S.","family":"Honda","sequence":"additional","affiliation":[]},{"given":"T. M.","family":"Hong","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Honig","sequence":"additional","affiliation":[]},{"given":"A.","family":"H\u00f6nle","sequence":"additional","affiliation":[]},{"given":"B. H.","family":"Hooberman","sequence":"additional","affiliation":[]},{"given":"W. H.","family":"Hopkins","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Horii","sequence":"additional","affiliation":[]},{"given":"P.","family":"Horn","sequence":"additional","affiliation":[]},{"given":"L. A.","family":"Horyn","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hou","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hoummada","sequence":"additional","affiliation":[]},{"given":"J.","family":"Howarth","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hoya","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hrabovsky","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hrdinka","sequence":"additional","affiliation":[]},{"given":"I.","family":"Hristova","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hrivnac","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hrynevich","sequence":"additional","affiliation":[]},{"given":"T.","family":"Hryn\u2019ova","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Hsu","sequence":"additional","affiliation":[]},{"given":"S.-C.","family":"Hsu","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Hu","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Y. F.","family":"Hu","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Hubacek","sequence":"additional","affiliation":[]},{"given":"F.","family":"Hubaut","sequence":"additional","affiliation":[]},{"given":"M.","family":"Huebner","sequence":"additional","affiliation":[]},{"given":"F.","family":"Huegging","sequence":"additional","affiliation":[]},{"given":"T. B.","family":"Huffman","sequence":"additional","affiliation":[]},{"given":"M.","family":"Huhtinen","sequence":"additional","affiliation":[]},{"given":"R. F. H.","family":"Hunter","sequence":"additional","affiliation":[]},{"given":"P.","family":"Huo","sequence":"additional","affiliation":[]},{"given":"N.","family":"Huseynov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Huston","sequence":"additional","affiliation":[]},{"given":"J.","family":"Huth","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hyneman","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hyrych","sequence":"additional","affiliation":[]},{"given":"G.","family":"Iacobucci","sequence":"additional","affiliation":[]},{"given":"G.","family":"Iakovidis","sequence":"additional","affiliation":[]},{"given":"I.","family":"Ibragimov","sequence":"additional","affiliation":[]},{"given":"L.","family":"Iconomidou-Fayard","sequence":"additional","affiliation":[]},{"given":"P.","family":"Iengo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Ignazzi","sequence":"additional","affiliation":[]},{"given":"O.","family":"Igonkina","sequence":"additional","affiliation":[]},{"given":"R.","family":"Iguchi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Iizawa","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Ikegami","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ikeno","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ilg","sequence":"additional","affiliation":[]},{"given":"D.","family":"Iliadis","sequence":"additional","affiliation":[]},{"given":"N.","family":"Ilic","sequence":"additional","affiliation":[]},{"given":"F.","family":"Iltzsche","sequence":"additional","affiliation":[]},{"given":"G.","family":"Introzzi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Iodice","sequence":"additional","affiliation":[]},{"given":"K.","family":"Iordanidou","sequence":"additional","affiliation":[]},{"given":"V.","family":"Ippolito","sequence":"additional","affiliation":[]},{"given":"M. F.","family":"Isacson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ishino","sequence":"additional","affiliation":[]},{"given":"W.","family":"Islam","sequence":"additional","affiliation":[]},{"given":"C.","family":"Issever","sequence":"additional","affiliation":[]},{"given":"S.","family":"Istin","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ito","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Iturbe Ponce","sequence":"additional","affiliation":[]},{"given":"R.","family":"Iuppa","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ivina","sequence":"additional","affiliation":[]},{"given":"H.","family":"Iwasaki","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Izen","sequence":"additional","affiliation":[]},{"given":"V.","family":"Izzo","sequence":"additional","affiliation":[]},{"given":"P.","family":"Jacka","sequence":"additional","affiliation":[]},{"given":"P.","family":"Jackson","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Jacobs","sequence":"additional","affiliation":[]},{"given":"B. P.","family":"Jaeger","sequence":"additional","affiliation":[]},{"given":"V.","family":"Jain","sequence":"additional","affiliation":[]},{"given":"G.","family":"J\u00e4kel","sequence":"additional","affiliation":[]},{"given":"K. B.","family":"Jakobi","sequence":"additional","affiliation":[]},{"given":"K.","family":"Jakobs","sequence":"additional","affiliation":[]},{"given":"T.","family":"Jakoubek","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jamieson","sequence":"additional","affiliation":[]},{"given":"K. W.","family":"Janas","sequence":"additional","affiliation":[]},{"given":"R.","family":"Jansky","sequence":"additional","affiliation":[]},{"given":"M.","family":"Janus","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Janus","sequence":"additional","affiliation":[]},{"given":"G.","family":"Jarlskog","sequence":"additional","affiliation":[]},{"given":"N.","family":"Javadov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Jav\u016frek","sequence":"additional","affiliation":[]},{"given":"M.","family":"Javurkova","sequence":"additional","affiliation":[]},{"given":"F.","family":"Jeanneau","sequence":"additional","affiliation":[]},{"given":"L.","family":"Jeanty","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jejelava","sequence":"additional","affiliation":[]},{"given":"A.","family":"Jelinskas","sequence":"additional","affiliation":[]},{"given":"P.","family":"Jenni","sequence":"additional","affiliation":[]},{"given":"N.","family":"Jeong","sequence":"additional","affiliation":[]},{"given":"S.","family":"J\u00e9z\u00e9quel","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ji","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jia","sequence":"additional","affiliation":[]},{"given":"H.","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Jiggins","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Jimenez Morales","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jimenez Pena","sequence":"additional","affiliation":[]},{"given":"S.","family":"Jin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Jinaru","sequence":"additional","affiliation":[]},{"given":"O.","family":"Jinnouchi","sequence":"additional","affiliation":[]},{"given":"H.","family":"Jivan","sequence":"additional","affiliation":[]},{"given":"P.","family":"Johansson","sequence":"additional","affiliation":[]},{"given":"K. A.","family":"Johns","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Johnson","sequence":"additional","affiliation":[]},{"given":"R. W. L.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"S.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jongmanns","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Jorge","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jovicevic","sequence":"additional","affiliation":[]},{"given":"X.","family":"Ju","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Junggeburth","sequence":"additional","affiliation":[]},{"given":"A.","family":"Juste Rozas","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kaczmarska","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kado","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kagan","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kagan","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kahn","sequence":"additional","affiliation":[]},{"given":"C.","family":"Kahra","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kaji","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kajomovitz","sequence":"additional","affiliation":[]},{"given":"C. W.","family":"Kalderon","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kaluza","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kamenshchikov","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kaneda","sequence":"additional","affiliation":[]},{"given":"N. J.","family":"Kang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kang","sequence":"additional","affiliation":[]},{"given":"L.","family":"Kanjir","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Kano","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kanzaki","sequence":"additional","affiliation":[]},{"given":"L. S.","family":"Kaplan","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kar","sequence":"additional","affiliation":[]},{"given":"K.","family":"Karava","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Kareem","sequence":"additional","affiliation":[]},{"given":"S. N.","family":"Karpov","sequence":"additional","affiliation":[]},{"given":"Z. M.","family":"Karpova","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kartvelishvili","sequence":"additional","affiliation":[]},{"given":"A. N.","family":"Karyukhin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kastanas","sequence":"additional","affiliation":[]},{"given":"C.","family":"Kato","sequence":"additional","affiliation":[]},{"given":"J.","family":"Katzy","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kawade","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kawagoe","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kawaguchi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kawamoto","sequence":"additional","affiliation":[]},{"given":"G.","family":"Kawamura","sequence":"additional","affiliation":[]},{"given":"E. F.","family":"Kay","sequence":"additional","affiliation":[]},{"given":"V. F.","family":"Kazanin","sequence":"additional","affiliation":[]},{"given":"R.","family":"Keeler","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kehoe","sequence":"additional","affiliation":[]},{"given":"J. S.","family":"Keller","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kellermann","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kelsey","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Kempster","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kendrick","sequence":"additional","affiliation":[]},{"given":"K. E.","family":"Kennedy","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kepka","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kersten","sequence":"additional","affiliation":[]},{"given":"B. P.","family":"Ker\u0161evan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ketabchi Haghighat","sequence":"additional","affiliation":[]},{"given":"M.","family":"Khader","sequence":"additional","affiliation":[]},{"given":"F.","family":"Khalil-Zada","sequence":"additional","affiliation":[]},{"given":"M.","family":"Khandoga","sequence":"additional","affiliation":[]},{"given":"A.","family":"Khanov","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Kharlamov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kharlamova","sequence":"additional","affiliation":[]},{"given":"E. E.","family":"Khoda","sequence":"additional","affiliation":[]},{"given":"A.","family":"Khodinov","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Khoo","sequence":"additional","affiliation":[]},{"given":"E.","family":"Khramov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Khubua","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kido","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kiehn","sequence":"additional","affiliation":[]},{"given":"C. R.","family":"Kilby","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Y. K.","family":"Kim","sequence":"additional","affiliation":[]},{"given":"N.","family":"Kimura","sequence":"additional","affiliation":[]},{"given":"O. M.","family":"Kind","sequence":"additional","affiliation":[]},{"given":"B. T.","family":"King","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kirchmeier","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kirk","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Kiryunin","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kishimoto","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Kisliuk","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kitali","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kivernyk","sequence":"additional","affiliation":[]},{"given":"T.","family":"Klapdor-Kleingrothaus","sequence":"additional","affiliation":[]},{"given":"M.","family":"Klassen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"M. H.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"M.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"U.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kleinknecht","sequence":"additional","affiliation":[]},{"given":"P.","family":"Klimek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Klimentov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Klingl","sequence":"additional","affiliation":[]},{"given":"T.","family":"Klioutchnikova","sequence":"additional","affiliation":[]},{"given":"F. F.","family":"Klitzner","sequence":"additional","affiliation":[]},{"given":"P.","family":"Kluit","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kluth","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kneringer","sequence":"additional","affiliation":[]},{"given":"E. B. F. G.","family":"Knoops","sequence":"additional","affiliation":[]},{"given":"A.","family":"Knue","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kobayashi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kobayashi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kobel","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kocian","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kodama","sequence":"additional","affiliation":[]},{"given":"P.","family":"Kodys","sequence":"additional","affiliation":[]},{"given":"P. T.","family":"Koenig","sequence":"additional","affiliation":[]},{"given":"T.","family":"Koffas","sequence":"additional","affiliation":[]},{"given":"N. M.","family":"K\u00f6hler","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kolb","sequence":"additional","affiliation":[]},{"given":"I.","family":"Koletsou","sequence":"additional","affiliation":[]},{"given":"T.","family":"Komarek","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kondo","sequence":"additional","affiliation":[]},{"given":"K.","family":"K\u00f6neke","sequence":"additional","affiliation":[]},{"given":"A. X. Y.","family":"Kong","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"K\u00f6nig","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kono","sequence":"additional","affiliation":[]},{"given":"V.","family":"Konstantinides","sequence":"additional","affiliation":[]},{"given":"N.","family":"Konstantinidis","sequence":"additional","affiliation":[]},{"given":"B.","family":"Konya","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kopeliansky","sequence":"additional","affiliation":[]},{"given":"S.","family":"Koperny","sequence":"additional","affiliation":[]},{"given":"K.","family":"Korcyl","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kordas","sequence":"additional","affiliation":[]},{"given":"G.","family":"Koren","sequence":"additional","affiliation":[]},{"given":"A.","family":"Korn","sequence":"additional","affiliation":[]},{"given":"I.","family":"Korolkov","sequence":"additional","affiliation":[]},{"given":"E. V.","family":"Korolkova","sequence":"additional","affiliation":[]},{"given":"N.","family":"Korotkova","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kortner","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kortner","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kosek","sequence":"additional","affiliation":[]},{"given":"V. V.","family":"Kostyukhin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kotsokechagia","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kotwal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Koulouris","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kourkoumeli-Charalampidi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Kourkoumelis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kourlitis","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kouskoura","sequence":"additional","affiliation":[]},{"given":"A. B.","family":"Kowalewska","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kowalewski","sequence":"additional","affiliation":[]},{"given":"W.","family":"Kozanecki","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Kozhin","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Kramarenko","sequence":"additional","affiliation":[]},{"given":"G.","family":"Kramberger","sequence":"additional","affiliation":[]},{"given":"D.","family":"Krasnopevtsev","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"Krasny","sequence":"additional","affiliation":[]},{"given":"A.","family":"Krasznahorkay","sequence":"additional","affiliation":[]},{"given":"D.","family":"Krauss","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Kremer","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kretzschmar","sequence":"additional","affiliation":[]},{"given":"P.","family":"Krieger","sequence":"additional","affiliation":[]},{"given":"F.","family":"Krieter","sequence":"additional","affiliation":[]},{"given":"A.","family":"Krishnan","sequence":"additional","affiliation":[]},{"given":"K.","family":"Krizka","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kroeninger","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kroha","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kroll","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kroll","sequence":"additional","affiliation":[]},{"given":"K. S.","family":"Krowpman","sequence":"additional","affiliation":[]},{"given":"U.","family":"Kruchonak","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kr\u00fcger","sequence":"additional","affiliation":[]},{"given":"N.","family":"Krumnack","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Kruse","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Krzysiak","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kubota","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kuchinskaia","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kuday","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kuechler","sequence":"additional","affiliation":[]},{"given":"J. T.","family":"Kuechler","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kuehn","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kugel","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kuhl","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kukhtin","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kukla","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Kulchitsky","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kuleshov","sequence":"additional","affiliation":[]},{"given":"Y. P.","family":"Kulinich","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kuna","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kunigo","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kupco","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kupfer","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kuprash","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kurashige","sequence":"additional","affiliation":[]},{"given":"L. L.","family":"Kurchaninov","sequence":"additional","affiliation":[]},{"given":"Y. A.","family":"Kurochkin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kurova","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Kurth","sequence":"additional","affiliation":[]},{"given":"E. S.","family":"Kuwertz","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kuze","sequence":"additional","affiliation":[]},{"given":"A. K.","family":"Kvam","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kvita","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kwan","sequence":"additional","affiliation":[]},{"given":"L.","family":"La Rotonda","sequence":"additional","affiliation":[]},{"given":"F.","family":"La Ruffa","sequence":"additional","affiliation":[]},{"given":"C.","family":"Lacasta","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lacava","sequence":"additional","affiliation":[]},{"given":"D. P. J.","family":"Lack","sequence":"additional","affiliation":[]},{"given":"H.","family":"Lacker","sequence":"additional","affiliation":[]},{"given":"D.","family":"Lacour","sequence":"additional","affiliation":[]},{"given":"E.","family":"Ladygin","sequence":"additional","affiliation":[]},{"given":"R.","family":"Lafaye","sequence":"additional","affiliation":[]},{"given":"B.","family":"Laforge","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lagouri","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lai","sequence":"additional","affiliation":[]},{"given":"I. K.","family":"Lakomiec","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lammers","sequence":"additional","affiliation":[]},{"given":"W.","family":"Lampl","sequence":"additional","affiliation":[]},{"given":"C.","family":"Lampoudis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Lan\u00e7on","sequence":"additional","affiliation":[]},{"given":"U.","family":"Landgraf","sequence":"additional","affiliation":[]},{"given":"M. P. J.","family":"Landon","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Lanfermann","sequence":"additional","affiliation":[]},{"given":"V. S.","family":"Lang","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Lange","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Langenberg","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Lankford","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lanni","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lantzsch","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lanza","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lapertosa","sequence":"additional","affiliation":[]},{"given":"S.","family":"Laplace","sequence":"additional","affiliation":[]},{"given":"J. F.","family":"Laporte","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lari","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lasagni Manghi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lassnig","sequence":"additional","affiliation":[]},{"given":"T. S.","family":"Lau","sequence":"additional","affiliation":[]},{"given":"A.","family":"Laudrain","sequence":"additional","affiliation":[]},{"given":"A.","family":"Laurier","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lavorgna","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Lawlor","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lazzaroni","sequence":"additional","affiliation":[]},{"given":"B.","family":"Le","sequence":"additional","affiliation":[]},{"given":"E.","family":"Le Guirriec","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lebedev","sequence":"additional","affiliation":[]},{"given":"M.","family":"LeBlanc","sequence":"additional","affiliation":[]},{"given":"T.","family":"LeCompte","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ledroit-Guillon","sequence":"additional","affiliation":[]},{"given":"A. C. A.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"G. R.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"L.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"S. C.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"B.","family":"Lefebvre","sequence":"additional","affiliation":[]},{"given":"H. P.","family":"Lefebvre","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lefebvre","sequence":"additional","affiliation":[]},{"given":"C.","family":"Leggett","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lehmann","sequence":"additional","affiliation":[]},{"given":"N.","family":"Lehmann","sequence":"additional","affiliation":[]},{"given":"G.","family":"Lehmann Miotto","sequence":"additional","affiliation":[]},{"given":"W. A.","family":"Leight","sequence":"additional","affiliation":[]},{"given":"A.","family":"Leisos","sequence":"additional","affiliation":[]},{"given":"M. A. L.","family":"Leite","sequence":"additional","affiliation":[]},{"given":"C. E.","family":"Leitgeb","sequence":"additional","affiliation":[]},{"given":"R.","family":"Leitner","sequence":"additional","affiliation":[]},{"given":"D.","family":"Lellouch","sequence":"additional","affiliation":[]},{"given":"K. J. C.","family":"Leney","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lenz","sequence":"additional","affiliation":[]},{"given":"R.","family":"Leone","sequence":"additional","affiliation":[]},{"given":"S.","family":"Leone","sequence":"additional","affiliation":[]},{"given":"C.","family":"Leonidopoulos","sequence":"additional","affiliation":[]},{"given":"A.","family":"Leopold","sequence":"additional","affiliation":[]},{"given":"C.","family":"Leroy","sequence":"additional","affiliation":[]},{"given":"R.","family":"Les","sequence":"additional","affiliation":[]},{"given":"C. G.","family":"Lester","sequence":"additional","affiliation":[]},{"given":"M.","family":"Levchenko","sequence":"additional","affiliation":[]},{"given":"J.","family":"Lev\u00eaque","sequence":"additional","affiliation":[]},{"given":"D.","family":"Levin","sequence":"additional","affiliation":[]},{"given":"L. J.","family":"Levinson","sequence":"additional","affiliation":[]},{"given":"D. J.","family":"Lewis","sequence":"additional","affiliation":[]},{"given":"B.","family":"Li","sequence":"additional","affiliation":[]},{"given":"B.","family":"Li","sequence":"additional","affiliation":[]},{"given":"C-Q.","family":"Li","sequence":"additional","affiliation":[]},{"given":"F.","family":"Li","sequence":"additional","affiliation":[]},{"given":"H.","family":"Li","sequence":"additional","affiliation":[]},{"given":"H.","family":"Li","sequence":"additional","affiliation":[]},{"given":"J.","family":"Li","sequence":"additional","affiliation":[]},{"given":"K.","family":"Li","sequence":"additional","affiliation":[]},{"given":"L.","family":"Li","sequence":"additional","affiliation":[]},{"given":"M.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Q. Y.","family":"Li","sequence":"additional","affiliation":[]},{"given":"S.","family":"Li","sequence":"additional","affiliation":[]},{"given":"X.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Liang","sequence":"additional","affiliation":[]},{"given":"B.","family":"Liberti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Liblong","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lie","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lim","sequence":"additional","affiliation":[]},{"given":"C. Y.","family":"Lin","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lin","sequence":"additional","affiliation":[]},{"given":"T. H.","family":"Lin","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Linck","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Lindon","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Lionti","sequence":"additional","affiliation":[]},{"given":"E.","family":"Lipeles","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lipniacka","sequence":"additional","affiliation":[]},{"given":"T. M.","family":"Liss","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lister","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Little","sequence":"additional","affiliation":[]},{"given":"B.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"B. X.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"H. B.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"H.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"J. K. K.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"K.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"M.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"M. Y.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"P.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y. L.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y. W.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"M.","family":"Livan","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lleres","sequence":"additional","affiliation":[]},{"given":"J.","family":"Llorente Merino","sequence":"additional","affiliation":[]},{"given":"S. L.","family":"Lloyd","sequence":"additional","affiliation":[]},{"given":"C. Y.","family":"Lo","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Lobodzinska","sequence":"additional","affiliation":[]},{"given":"P.","family":"Loch","sequence":"additional","affiliation":[]},{"given":"S.","family":"Loffredo","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lohse","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lohwasser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lokajicek","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Long","sequence":"additional","affiliation":[]},{"given":"R. E.","family":"Long","sequence":"additional","affiliation":[]},{"given":"L.","family":"Longo","sequence":"additional","affiliation":[]},{"given":"K. A.","family":"Looper","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Lopez","sequence":"additional","affiliation":[]},{"given":"I. Lopez","family":"Paz","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lopez Solis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Lorenz","sequence":"additional","affiliation":[]},{"given":"N.","family":"Lorenzo Martinez","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Lory","sequence":"additional","affiliation":[]},{"given":"M.","family":"Losada","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"L\u00f6sel","sequence":"additional","affiliation":[]},{"given":"A.","family":"L\u00f6sle","sequence":"additional","affiliation":[]},{"given":"X.","family":"Lou","sequence":"additional","affiliation":[]},{"given":"X.","family":"Lou","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lounis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Love","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Love","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Lozano Bahilo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Y. J.","family":"Lu","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Lubatti","sequence":"additional","affiliation":[]},{"given":"C.","family":"Luci","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lucotte","sequence":"additional","affiliation":[]},{"given":"C.","family":"Luedtke","sequence":"additional","affiliation":[]},{"given":"F.","family":"Luehring","sequence":"additional","affiliation":[]},{"given":"I.","family":"Luise","sequence":"additional","affiliation":[]},{"given":"L.","family":"Luminari","sequence":"additional","affiliation":[]},{"given":"B.","family":"Lund-Jensen","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Lutz","sequence":"additional","affiliation":[]},{"given":"D.","family":"Lynn","sequence":"additional","affiliation":[]},{"given":"H.","family":"Lyons","sequence":"additional","affiliation":[]},{"given":"R.","family":"Lysak","sequence":"additional","affiliation":[]},{"given":"E.","family":"Lytken","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lyu","sequence":"additional","affiliation":[]},{"given":"V.","family":"Lyubushkin","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lyubushkina","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ma","sequence":"additional","affiliation":[]},{"given":"L. L.","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Ma","sequence":"additional","affiliation":[]},{"given":"G.","family":"Maccarrone","sequence":"additional","affiliation":[]},{"given":"A.","family":"Macchiolo","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Macdonald","sequence":"additional","affiliation":[]},{"given":"J.","family":"Machado Miguens","sequence":"additional","affiliation":[]},{"given":"D.","family":"Madaffari","sequence":"additional","affiliation":[]},{"given":"R.","family":"Madar","sequence":"additional","affiliation":[]},{"given":"W. F.","family":"Mader","sequence":"additional","affiliation":[]},{"given":"M.","family":"Madugoda Ralalage Don","sequence":"additional","affiliation":[]},{"given":"N.","family":"Madysa","sequence":"additional","affiliation":[]},{"given":"J.","family":"Maeda","sequence":"additional","affiliation":[]},{"given":"T.","family":"Maeno","sequence":"additional","affiliation":[]},{"given":"M.","family":"Maerker","sequence":"additional","affiliation":[]},{"given":"V.","family":"Magerl","sequence":"additional","affiliation":[]},{"given":"N.","family":"Magini","sequence":"additional","affiliation":[]},{"given":"J.","family":"Magro","sequence":"additional","affiliation":[]},{"given":"D. J.","family":"Mahon","sequence":"additional","affiliation":[]},{"given":"C.","family":"Maidantchik","sequence":"additional","affiliation":[]},{"given":"T.","family":"Maier","sequence":"additional","affiliation":[]},{"given":"A.","family":"Maio","sequence":"additional","affiliation":[]},{"given":"K.","family":"Maj","sequence":"additional","affiliation":[]},{"given":"O.","family":"Majersky","sequence":"additional","affiliation":[]},{"given":"S.","family":"Majewski","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Makida","sequence":"additional","affiliation":[]},{"given":"N.","family":"Makovec","sequence":"additional","affiliation":[]},{"given":"B.","family":"Malaescu","sequence":"additional","affiliation":[]},{"given":"Pa.","family":"Malecki","sequence":"additional","affiliation":[]},{"given":"V. P.","family":"Maleev","sequence":"additional","affiliation":[]},{"given":"F.","family":"Malek","sequence":"additional","affiliation":[]},{"given":"U.","family":"Mallik","sequence":"additional","affiliation":[]},{"given":"D.","family":"Malon","sequence":"additional","affiliation":[]},{"given":"C.","family":"Malone","sequence":"additional","affiliation":[]},{"given":"S.","family":"Maltezos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Malyukov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Mamuzic","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mancini","sequence":"additional","affiliation":[]},{"given":"I.","family":"Mandi\u0107","sequence":"additional","affiliation":[]},{"given":"L.","family":"Manhaes de Andrade Filho","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Maniatis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Manjarres Ramos","sequence":"additional","affiliation":[]},{"given":"K. H.","family":"Mankinen","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mann","sequence":"additional","affiliation":[]},{"given":"A.","family":"Manousos","sequence":"additional","affiliation":[]},{"given":"B.","family":"Mansoulie","sequence":"additional","affiliation":[]},{"given":"I.","family":"Manthos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Manzoni","sequence":"additional","affiliation":[]},{"given":"A.","family":"Marantis","sequence":"additional","affiliation":[]},{"given":"G.","family":"Marceca","sequence":"additional","affiliation":[]},{"given":"L.","family":"Marchese","sequence":"additional","affiliation":[]},{"given":"G.","family":"Marchiori","sequence":"additional","affiliation":[]},{"given":"M.","family":"Marcisovsky","sequence":"additional","affiliation":[]},{"given":"L.","family":"Marcoccia","sequence":"additional","affiliation":[]},{"given":"C.","family":"Marcon","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Marin Tobon","sequence":"additional","affiliation":[]},{"given":"M.","family":"Marjanovic","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Marshall","sequence":"additional","affiliation":[]},{"given":"M. U. F.","family":"Martensson","sequence":"additional","affiliation":[]},{"given":"S.","family":"Marti-Garcia","sequence":"additional","affiliation":[]},{"given":"C. B.","family":"Martin","sequence":"additional","affiliation":[]},{"given":"T. A.","family":"Martin","sequence":"additional","affiliation":[]},{"given":"V. J.","family":"Martin","sequence":"additional","affiliation":[]},{"given":"B.","family":"Martin dit Latour","sequence":"additional","affiliation":[]},{"given":"L.","family":"Martinelli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Martinez","sequence":"additional","affiliation":[]},{"given":"V. I.","family":"Martinez Outschoorn","sequence":"additional","affiliation":[]},{"given":"S.","family":"Martin-Haugh","sequence":"additional","affiliation":[]},{"given":"V. S.","family":"Martoiu","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Martyniuk","sequence":"additional","affiliation":[]},{"given":"A.","family":"Marzin","sequence":"additional","affiliation":[]},{"given":"S. R.","family":"Maschek","sequence":"additional","affiliation":[]},{"given":"L.","family":"Masetti","sequence":"additional","affiliation":[]},{"given":"T.","family":"Mashimo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Mashinistov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Masik","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Maslennikov","sequence":"additional","affiliation":[]},{"given":"L.","family":"Massa","sequence":"additional","affiliation":[]},{"given":"P.","family":"Massarotti","sequence":"additional","affiliation":[]},{"given":"P.","family":"Mastrandrea","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mastroberardino","sequence":"additional","affiliation":[]},{"given":"T.","family":"Masubuchi","sequence":"additional","affiliation":[]},{"given":"D.","family":"Matakias","sequence":"additional","affiliation":[]},{"given":"A.","family":"Matic","sequence":"additional","affiliation":[]},{"given":"N.","family":"Matsuzawa","sequence":"additional","affiliation":[]},{"given":"P.","family":"M\u00e4ttig","sequence":"additional","affiliation":[]},{"given":"J.","family":"Maurer","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ma\u010dek","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Maximov","sequence":"additional","affiliation":[]},{"given":"R.","family":"Mazini","sequence":"additional","affiliation":[]},{"given":"I.","family":"Maznas","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Mazza","sequence":"additional","affiliation":[]},{"given":"S. P.","family":"Mc Kee","sequence":"additional","affiliation":[]},{"given":"T. G.","family":"McCarthy","sequence":"additional","affiliation":[]},{"given":"W. P.","family":"McCormack","sequence":"additional","affiliation":[]},{"given":"E. F.","family":"McDonald","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Mcfayden","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mchedlidze","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"McKay","sequence":"additional","affiliation":[]},{"given":"K. D.","family":"McLean","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"McMahon","sequence":"additional","affiliation":[]},{"given":"P. C.","family":"McNamara","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"McNicol","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"McPherson","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Mdhluli","sequence":"additional","affiliation":[]},{"given":"Z. A.","family":"Meadows","sequence":"additional","affiliation":[]},{"given":"S.","family":"Meehan","sequence":"additional","affiliation":[]},{"given":"T.","family":"Megy","sequence":"additional","affiliation":[]},{"given":"S.","family":"Mehlhase","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mehta","sequence":"additional","affiliation":[]},{"given":"T.","family":"Meideck","sequence":"additional","affiliation":[]},{"given":"B.","family":"Meirose","sequence":"additional","affiliation":[]},{"given":"D.","family":"Melini","sequence":"additional","affiliation":[]},{"given":"B. R.","family":"Mellado Garcia","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Mellenthin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Melo","sequence":"additional","affiliation":[]},{"given":"F.","family":"Meloni","sequence":"additional","affiliation":[]},{"given":"A.","family":"Melzer","sequence":"additional","affiliation":[]},{"given":"S. B.","family":"Menary","sequence":"additional","affiliation":[]},{"given":"E. D.","family":"Mendes Gouveia","sequence":"additional","affiliation":[]},{"given":"L.","family":"Meng","sequence":"additional","affiliation":[]},{"given":"X. T.","family":"Meng","sequence":"additional","affiliation":[]},{"given":"S.","family":"Menke","sequence":"additional","affiliation":[]},{"given":"E.","family":"Meoni","sequence":"additional","affiliation":[]},{"given":"S.","family":"Mergelmeyer","sequence":"additional","affiliation":[]},{"given":"S. A. M.","family":"Merkt","sequence":"additional","affiliation":[]},{"given":"C.","family":"Merlassino","sequence":"additional","affiliation":[]},{"given":"P.","family":"Mermod","sequence":"additional","affiliation":[]},{"given":"L.","family":"Merola","sequence":"additional","affiliation":[]},{"given":"C.","family":"Meroni","sequence":"additional","affiliation":[]},{"given":"G.","family":"Merz","sequence":"additional","affiliation":[]},{"given":"O.","family":"Meshkov","sequence":"additional","affiliation":[]},{"given":"J. K. R.","family":"Meshreki","sequence":"additional","affiliation":[]},{"given":"A.","family":"Messina","sequence":"additional","affiliation":[]},{"given":"J.","family":"Metcalfe","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Mete","sequence":"additional","affiliation":[]},{"given":"C.","family":"Meyer","sequence":"additional","affiliation":[]},{"given":"J-P.","family":"Meyer","sequence":"additional","affiliation":[]},{"given":"H.","family":"Meyer Zu Theenhausen","sequence":"additional","affiliation":[]},{"given":"F.","family":"Miano","sequence":"additional","affiliation":[]},{"given":"M.","family":"Michetti","sequence":"additional","affiliation":[]},{"given":"R. P.","family":"Middleton","sequence":"additional","affiliation":[]},{"given":"L.","family":"Mijovi\u0107","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mikenberg","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mikestikova","sequence":"additional","affiliation":[]},{"given":"M.","family":"Miku\u017e","sequence":"additional","affiliation":[]},{"given":"H.","family":"Mildner","sequence":"additional","affiliation":[]},{"given":"M.","family":"Milesi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Milic","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Milke","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Millar","sequence":"additional","affiliation":[]},{"given":"D. W.","family":"Miller","sequence":"additional","affiliation":[]},{"given":"A.","family":"Milov","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Milstead","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Mina","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Minaenko","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mi\u00f1ano Moya","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Minashvili","sequence":"additional","affiliation":[]},{"given":"A. I.","family":"Mincer","sequence":"additional","affiliation":[]},{"given":"B.","family":"Mindur","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mineev","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Minegishi","sequence":"additional","affiliation":[]},{"given":"L. M.","family":"Mir","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mirto","sequence":"additional","affiliation":[]},{"given":"K. P.","family":"Mistry","sequence":"additional","affiliation":[]},{"given":"T.","family":"Mitani","sequence":"additional","affiliation":[]},{"given":"J.","family":"Mitrevski","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Mitsou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mittal","sequence":"additional","affiliation":[]},{"given":"O.","family":"Miu","sequence":"additional","affiliation":[]},{"given":"A.","family":"Miucci","sequence":"additional","affiliation":[]},{"given":"P. S.","family":"Miyagawa","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mizukami","sequence":"additional","affiliation":[]},{"given":"J. U.","family":"Mj\u00f6rnmark","sequence":"additional","affiliation":[]},{"given":"T.","family":"Mkrtchyan","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mlynarikova","sequence":"additional","affiliation":[]},{"given":"T.","family":"Moa","sequence":"additional","affiliation":[]},{"given":"K.","family":"Mochizuki","sequence":"additional","affiliation":[]},{"given":"P.","family":"Mogg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Mohapatra","sequence":"additional","affiliation":[]},{"given":"R.","family":"Moles-Valls","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Mondragon","sequence":"additional","affiliation":[]},{"given":"K.","family":"M\u00f6nig","sequence":"additional","affiliation":[]},{"given":"J.","family":"Monk","sequence":"additional","affiliation":[]},{"given":"E.","family":"Monnier","sequence":"additional","affiliation":[]},{"given":"A.","family":"Montalbano","sequence":"additional","affiliation":[]},{"given":"J.","family":"Montejo Berlingen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Montella","sequence":"additional","affiliation":[]},{"given":"F.","family":"Monticelli","sequence":"additional","affiliation":[]},{"given":"N.","family":"Morange","sequence":"additional","affiliation":[]},{"given":"D.","family":"Moreno","sequence":"additional","affiliation":[]},{"given":"M.","family":"Moreno Ll\u00e1cer","sequence":"additional","affiliation":[]},{"given":"C.","family":"Moreno Martinez","sequence":"additional","affiliation":[]},{"given":"P.","family":"Morettini","sequence":"additional","affiliation":[]},{"given":"M.","family":"Morgenstern","sequence":"additional","affiliation":[]},{"given":"S.","family":"Morgenstern","sequence":"additional","affiliation":[]},{"given":"D.","family":"Mori","sequence":"additional","affiliation":[]},{"given":"M.","family":"Morii","sequence":"additional","affiliation":[]},{"given":"M.","family":"Morinaga","sequence":"additional","affiliation":[]},{"given":"V.","family":"Morisbak","sequence":"additional","affiliation":[]},{"given":"A. K.","family":"Morley","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mornacchi","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"Morris","sequence":"additional","affiliation":[]},{"given":"L.","family":"Morvaj","sequence":"additional","affiliation":[]},{"given":"P.","family":"Moschovakos","sequence":"additional","affiliation":[]},{"given":"B.","family":"Moser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mosidze","sequence":"additional","affiliation":[]},{"given":"T.","family":"Moskalets","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Moss","sequence":"additional","affiliation":[]},{"given":"J.","family":"Moss","sequence":"additional","affiliation":[]},{"given":"E. J. W.","family":"Moyse","sequence":"additional","affiliation":[]},{"given":"S.","family":"Muanza","sequence":"additional","affiliation":[]},{"given":"J.","family":"Mueller","sequence":"additional","affiliation":[]},{"given":"R. S. P.","family":"Mueller","sequence":"additional","affiliation":[]},{"given":"D.","family":"Muenstermann","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Mullier","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Mungo","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Munoz Martinez","sequence":"additional","affiliation":[]},{"given":"F. J.","family":"Munoz Sanchez","sequence":"additional","affiliation":[]},{"given":"P.","family":"Murin","sequence":"additional","affiliation":[]},{"given":"W. J.","family":"Murray","sequence":"additional","affiliation":[]},{"given":"A.","family":"Murrone","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mu\u0161kinja","sequence":"additional","affiliation":[]},{"given":"kinja C.","family":"Mwewa","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Myagkov","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Myers","sequence":"additional","affiliation":[]},{"given":"J.","family":"Myers","sequence":"additional","affiliation":[]},{"given":"M.","family":"Myska","sequence":"additional","affiliation":[]},{"given":"B. P.","family":"Nachman","sequence":"additional","affiliation":[]},{"given":"O.","family":"Nackenhorst","sequence":"additional","affiliation":[]},{"given":"A. Nag","family":"Nag","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nagai","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nagano","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Nagasaka","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Nagle","sequence":"additional","affiliation":[]},{"given":"E.","family":"Nagy","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Nairz","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Nakahama","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nakamura","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nakamura","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nakano","sequence":"additional","affiliation":[]},{"given":"H.","family":"Nanjo","sequence":"additional","affiliation":[]},{"given":"F.","family":"Napolitano","sequence":"additional","affiliation":[]},{"given":"R. F.","family":"Naranjo Garcia","sequence":"additional","affiliation":[]},{"given":"R.","family":"Narayan","sequence":"additional","affiliation":[]},{"given":"I.","family":"Naryshkin","sequence":"additional","affiliation":[]},{"given":"T.","family":"Naumann","sequence":"additional","affiliation":[]},{"given":"G.","family":"Navarro","sequence":"additional","affiliation":[]},{"given":"P. Y.","family":"Nechaeva","sequence":"additional","affiliation":[]},{"given":"F.","family":"Nechansky","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Neep","sequence":"additional","affiliation":[]},{"given":"A.","family":"Negri","sequence":"additional","affiliation":[]},{"given":"M.","family":"Negrini","sequence":"additional","affiliation":[]},{"given":"C.","family":"Nellist","sequence":"additional","affiliation":[]},{"given":"M. E.","family":"Nelson","sequence":"additional","affiliation":[]},{"given":"S.","family":"Nemecek","sequence":"additional","affiliation":[]},{"given":"M.","family":"Nessi","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Neubauer","sequence":"additional","affiliation":[]},{"given":"F.","family":"Neuhaus","sequence":"additional","affiliation":[]},{"given":"M.","family":"Neumann","sequence":"additional","affiliation":[]},{"given":"R.","family":"Newhouse","sequence":"additional","affiliation":[]},{"given":"P. R.","family":"Newman","sequence":"additional","affiliation":[]},{"given":"C. W.","family":"Ng","sequence":"additional","affiliation":[]},{"given":"Y. S.","family":"Ng","sequence":"additional","affiliation":[]},{"given":"Y. W. Y.","family":"Ng","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ngair","sequence":"additional","affiliation":[]},{"given":"H. D. N.","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nguyen Manh","sequence":"additional","affiliation":[]},{"given":"E.","family":"Nibigira","sequence":"additional","affiliation":[]},{"given":"R. B.","family":"Nickerson","sequence":"additional","affiliation":[]},{"given":"R.","family":"Nicolaidou","sequence":"additional","affiliation":[]},{"given":"D. S.","family":"Nielsen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Nielsen","sequence":"additional","affiliation":[]},{"given":"N.","family":"Nikiforou","sequence":"additional","affiliation":[]},{"given":"V.","family":"Nikolaenko","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nikolic-Audit","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nikolopoulos","sequence":"additional","affiliation":[]},{"given":"P.","family":"Nilsson","sequence":"additional","affiliation":[]},{"given":"H. R.","family":"Nindhito","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Ninomiya","sequence":"additional","affiliation":[]},{"given":"A.","family":"Nisati","sequence":"additional","affiliation":[]},{"given":"N.","family":"Nishu","sequence":"additional","affiliation":[]},{"given":"R.","family":"Nisius","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nitsche","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nitta","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nobe","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Noguchi","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nomidis","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Nomura","sequence":"additional","affiliation":[]},{"given":"M.","family":"Nordberg","sequence":"additional","affiliation":[]},{"given":"T.","family":"Novak","sequence":"additional","affiliation":[]},{"given":"O.","family":"Novgorodova","sequence":"additional","affiliation":[]},{"given":"R.","family":"Novotny","sequence":"additional","affiliation":[]},{"given":"L.","family":"Nozka","sequence":"additional","affiliation":[]},{"given":"K.","family":"Ntekas","sequence":"additional","affiliation":[]},{"given":"E.","family":"Nurse","sequence":"additional","affiliation":[]},{"given":"F. G.","family":"Oakham","sequence":"additional","affiliation":[]},{"given":"H.","family":"Oberlack","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ocariz","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ochi","sequence":"additional","affiliation":[]},{"given":"I.","family":"Ochoa","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Ochoa-Ricoux","sequence":"additional","affiliation":[]},{"given":"K.","family":"O\u2019Connor","sequence":"additional","affiliation":[]},{"given":"S.","family":"Oda","sequence":"additional","affiliation":[]},{"given":"S.","family":"Odaka","sequence":"additional","affiliation":[]},{"given":"S.","family":"Oerdek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ogrodnik","sequence":"additional","affiliation":[]},{"given":"A.","family":"Oh","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Oh","sequence":"additional","affiliation":[]},{"given":"C. C.","family":"Ohm","sequence":"additional","affiliation":[]},{"given":"H.","family":"Oide","sequence":"additional","affiliation":[]},{"given":"M. L.","family":"Ojeda","sequence":"additional","affiliation":[]},{"given":"H.","family":"Okawa","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Okazaki","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"O\u2019Keefe","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Okumura","sequence":"additional","affiliation":[]},{"given":"T.","family":"Okuyama","sequence":"additional","affiliation":[]},{"given":"A.","family":"Olariu","sequence":"additional","affiliation":[]},{"given":"L. F.","family":"Oleiro Seabra","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Olivares Pino","sequence":"additional","affiliation":[]},{"given":"D.","family":"Oliveira Damazio","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Oliver","sequence":"additional","affiliation":[]},{"given":"M. J. R.","family":"Olsson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Olszewski","sequence":"additional","affiliation":[]},{"given":"J.","family":"Olszowska","sequence":"additional","affiliation":[]},{"given":"D. C.","family":"O\u2019Neil","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"O\u2019neill","sequence":"additional","affiliation":[]},{"given":"A.","family":"Onofre","sequence":"additional","affiliation":[]},{"given":"P. U. E.","family":"Onyisi","sequence":"additional","affiliation":[]},{"given":"H.","family":"Oppen","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Oreglia","sequence":"additional","affiliation":[]},{"given":"G. E.","family":"Orellana","sequence":"additional","affiliation":[]},{"given":"D.","family":"Orestano","sequence":"additional","affiliation":[]},{"given":"N.","family":"Orlando","sequence":"additional","affiliation":[]},{"given":"R. S.","family":"Orr","sequence":"additional","affiliation":[]},{"given":"V.","family":"O\u2019Shea","sequence":"additional","affiliation":[]},{"given":"R.","family":"Ospanov","sequence":"additional","affiliation":[]},{"given":"G.","family":"Otero y Garzon","sequence":"additional","affiliation":[]},{"given":"H.","family":"Otono","sequence":"additional","affiliation":[]},{"given":"P. S.","family":"Ott","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ouchrif","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ouellette","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ould-Saada","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ouraou","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Ouyang","sequence":"additional","affiliation":[]},{"given":"M.","family":"Owen","sequence":"additional","affiliation":[]},{"given":"R. E.","family":"Owen","sequence":"additional","affiliation":[]},{"given":"V. E.","family":"Ozcan","sequence":"additional","affiliation":[]},{"given":"N.","family":"Ozturk","sequence":"additional","affiliation":[]},{"given":"J.","family":"Pacalt","sequence":"additional","affiliation":[]},{"given":"H. A.","family":"Pacey","sequence":"additional","affiliation":[]},{"given":"K.","family":"Pachal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Pacheco Pages","sequence":"additional","affiliation":[]},{"given":"C.","family":"Padilla Aranda","sequence":"additional","affiliation":[]},{"given":"S.","family":"Pagan Griso","sequence":"additional","affiliation":[]},{"given":"M.","family":"Paganini","sequence":"additional","affiliation":[]},{"given":"G.","family":"Palacino","sequence":"additional","affiliation":[]},{"given":"S.","family":"Palazzo","sequence":"additional","affiliation":[]},{"given":"S.","family":"Palestini","sequence":"additional","affiliation":[]},{"given":"M.","family":"Palka","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pallin","sequence":"additional","affiliation":[]},{"given":"P.","family":"Palni","sequence":"additional","affiliation":[]},{"given":"I.","family":"Panagoulias","sequence":"additional","affiliation":[]},{"given":"C. E.","family":"Pandini","sequence":"additional","affiliation":[]},{"given":"J. G.","family":"Panduro Vazquez","sequence":"additional","affiliation":[]},{"given":"P.","family":"Pani","sequence":"additional","affiliation":[]},{"given":"G.","family":"Panizzo","sequence":"additional","affiliation":[]},{"given":"L.","family":"Paolozzi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Papadatos","sequence":"additional","affiliation":[]},{"given":"K.","family":"Papageorgiou","sequence":"additional","affiliation":[]},{"given":"S.","family":"Parajuli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Paramonov","sequence":"additional","affiliation":[]},{"given":"D.","family":"Paredes Hernandez","sequence":"additional","affiliation":[]},{"given":"S. R.","family":"Paredes Saenz","sequence":"additional","affiliation":[]},{"given":"B.","family":"Parida","sequence":"additional","affiliation":[]},{"given":"T. H.","family":"Park","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Parker","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Parker","sequence":"additional","affiliation":[]},{"given":"F.","family":"Parodi","sequence":"additional","affiliation":[]},{"given":"E. W.","family":"Parrish","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Parsons","sequence":"additional","affiliation":[]},{"given":"U.","family":"Parzefall","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pascual Dominguez","sequence":"additional","affiliation":[]},{"given":"V. R.","family":"Pascuzzi","sequence":"additional","affiliation":[]},{"given":"J. M. P.","family":"Pasner","sequence":"additional","affiliation":[]},{"given":"F.","family":"Pasquali","sequence":"additional","affiliation":[]},{"given":"E.","family":"Pasqualucci","sequence":"additional","affiliation":[]},{"given":"S.","family":"Passaggio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Pastore","sequence":"additional","affiliation":[]},{"given":"P.","family":"Pasuwan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Pataraia","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Pater","sequence":"additional","affiliation":[]},{"given":"A.","family":"Pathak","sequence":"additional","affiliation":[]},{"given":"J.","family":"Patton","sequence":"additional","affiliation":[]},{"given":"T.","family":"Pauly","sequence":"additional","affiliation":[]},{"given":"J.","family":"Pearkes","sequence":"additional","affiliation":[]},{"given":"B.","family":"Pearson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pedersen","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pedraza Diaz","sequence":"additional","affiliation":[]},{"given":"R.","family":"Pedro","sequence":"additional","affiliation":[]},{"given":"T.","family":"Peiffer","sequence":"additional","affiliation":[]},{"given":"S. V.","family":"Peleganchuk","sequence":"additional","affiliation":[]},{"given":"O.","family":"Penc","sequence":"additional","affiliation":[]},{"given":"H.","family":"Peng","sequence":"additional","affiliation":[]},{"given":"B. S.","family":"Peralva","sequence":"additional","affiliation":[]},{"given":"M. M.","family":"Perego","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"Pereira Peixoto","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pereira Sanchez","sequence":"additional","affiliation":[]},{"given":"D. V.","family":"Perepelitsa","sequence":"additional","affiliation":[]},{"given":"F.","family":"Peri","sequence":"additional","affiliation":[]},{"given":"L.","family":"Perini","sequence":"additional","affiliation":[]},{"given":"H.","family":"Pernegger","sequence":"additional","affiliation":[]},{"given":"S.","family":"Perrella","sequence":"additional","affiliation":[]},{"given":"A.","family":"Perrevoort","sequence":"additional","affiliation":[]},{"given":"K.","family":"Peters","sequence":"additional","affiliation":[]},{"given":"R. F. Y.","family":"Peters","sequence":"additional","affiliation":[]},{"given":"B. A.","family":"Petersen","sequence":"additional","affiliation":[]},{"given":"T. C.","family":"Petersen","sequence":"additional","affiliation":[]},{"given":"E.","family":"Petit","sequence":"additional","affiliation":[]},{"given":"A.","family":"Petridis","sequence":"additional","affiliation":[]},{"given":"C.","family":"Petridou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Petrov","sequence":"additional","affiliation":[]},{"given":"F.","family":"Petrucci","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pettee","sequence":"additional","affiliation":[]},{"given":"N. E.","family":"Pettersson","sequence":"additional","affiliation":[]},{"given":"K.","family":"Petukhova","sequence":"additional","affiliation":[]},{"given":"A.","family":"Peyaud","sequence":"additional","affiliation":[]},{"given":"R.","family":"Pezoa","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pezzotti","sequence":"additional","affiliation":[]},{"given":"T.","family":"Pham","sequence":"additional","affiliation":[]},{"given":"F. H.","family":"Phillips","sequence":"additional","affiliation":[]},{"given":"P. W.","family":"Phillips","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"Phipps","sequence":"additional","affiliation":[]},{"given":"G.","family":"Piacquadio","sequence":"additional","affiliation":[]},{"given":"E.","family":"Pianori","sequence":"additional","affiliation":[]},{"given":"A.","family":"Picazio","sequence":"additional","affiliation":[]},{"given":"R. H.","family":"Pickles","sequence":"additional","affiliation":[]},{"given":"R.","family":"Piegaia","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pietreanu","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Pilcher","sequence":"additional","affiliation":[]},{"given":"A. D.","family":"Pilkington","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pinamonti","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Pinfold","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pitt","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pizzimento","sequence":"additional","affiliation":[]},{"given":"M.-A.","family":"Pleier","sequence":"additional","affiliation":[]},{"given":"V.","family":"Pleskot","sequence":"additional","affiliation":[]},{"given":"E.","family":"Plotnikova","sequence":"additional","affiliation":[]},{"given":"P.","family":"Podberezko","sequence":"additional","affiliation":[]},{"given":"R.","family":"Poettgen","sequence":"additional","affiliation":[]},{"given":"R.","family":"Poggi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Poggioli","sequence":"additional","affiliation":[]},{"given":"I.","family":"Pogrebnyak","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pohl","sequence":"additional","affiliation":[]},{"given":"I.","family":"Pokharel","sequence":"additional","affiliation":[]},{"given":"G.","family":"Polesello","sequence":"additional","affiliation":[]},{"given":"A.","family":"Poley","sequence":"additional","affiliation":[]},{"given":"A.","family":"Policicchio","sequence":"additional","affiliation":[]},{"given":"R.","family":"Polifka","sequence":"additional","affiliation":[]},{"given":"A.","family":"Polini","sequence":"additional","affiliation":[]},{"given":"C. S.","family":"Pollard","sequence":"additional","affiliation":[]},{"given":"V.","family":"Polychronakos","sequence":"additional","affiliation":[]},{"given":"D.","family":"Ponomarenko","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pontecorvo","sequence":"additional","affiliation":[]},{"given":"S.","family":"Popa","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Popeneciu","sequence":"additional","affiliation":[]},{"given":"L.","family":"Portales","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Portillo Quintero","sequence":"additional","affiliation":[]},{"given":"S.","family":"Pospisil","sequence":"additional","affiliation":[]},{"given":"K.","family":"Potamianos","sequence":"additional","affiliation":[]},{"given":"I. N.","family":"Potrap","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Potter","sequence":"additional","affiliation":[]},{"given":"H.","family":"Potti","sequence":"additional","affiliation":[]},{"given":"T.","family":"Poulsen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Poveda","sequence":"additional","affiliation":[]},{"given":"T. D.","family":"Powell","sequence":"additional","affiliation":[]},{"given":"G.","family":"Pownall","sequence":"additional","affiliation":[]},{"given":"M. E.","family":"Pozo Astigarraga","sequence":"additional","affiliation":[]},{"given":"P.","family":"Pralavorio","sequence":"additional","affiliation":[]},{"given":"S.","family":"Prell","sequence":"additional","affiliation":[]},{"given":"D.","family":"Price","sequence":"additional","affiliation":[]},{"given":"M.","family":"Primavera","sequence":"additional","affiliation":[]},{"given":"S.","family":"Prince","sequence":"additional","affiliation":[]},{"given":"M. L.","family":"Proffitt","sequence":"additional","affiliation":[]},{"given":"N.","family":"Proklova","sequence":"additional","affiliation":[]},{"given":"K.","family":"Prokofiev","sequence":"additional","affiliation":[]},{"given":"F.","family":"Prokoshin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Protopopescu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Proudfoot","sequence":"additional","affiliation":[]},{"given":"M.","family":"Przybycien","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pudzha","sequence":"additional","affiliation":[]},{"given":"A.","family":"Puri","sequence":"additional","affiliation":[]},{"given":"P.","family":"Puzo","sequence":"additional","affiliation":[]},{"given":"J.","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Qin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Quadt","sequence":"additional","affiliation":[]},{"given":"M.","family":"Queitsch-Maitland","sequence":"additional","affiliation":[]},{"given":"A.","family":"Qureshi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Racko","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ragusa","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rahal","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Raine","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rajagopalan","sequence":"additional","affiliation":[]},{"given":"A. Ramirez","family":"Morales","sequence":"additional","affiliation":[]},{"given":"K.","family":"Ran","sequence":"additional","affiliation":[]},{"given":"T.","family":"Rashid","sequence":"additional","affiliation":[]},{"given":"S.","family":"Raspopov","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Rauch","sequence":"additional","affiliation":[]},{"given":"F.","family":"Rauscher","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rave","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ravina","sequence":"additional","affiliation":[]},{"given":"I.","family":"Ravinovich","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Rawling","sequence":"additional","affiliation":[]},{"given":"M.","family":"Raymond","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Read","sequence":"additional","affiliation":[]},{"given":"N. P.","family":"Readioff","sequence":"additional","affiliation":[]},{"given":"M.","family":"Reale","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Rebuzzi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Redelbach","sequence":"additional","affiliation":[]},{"given":"G.","family":"Redlinger","sequence":"additional","affiliation":[]},{"given":"K.","family":"Reeves","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rehnisch","sequence":"additional","affiliation":[]},{"given":"J.","family":"Reichert","sequence":"additional","affiliation":[]},{"given":"D.","family":"Reikher","sequence":"additional","affiliation":[]},{"given":"A.","family":"Reiss","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rej","sequence":"additional","affiliation":[]},{"given":"C.","family":"Rembser","sequence":"additional","affiliation":[]},{"given":"A.","family":"Renardi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Renda","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rescigno","sequence":"additional","affiliation":[]},{"given":"S.","family":"Resconi","sequence":"additional","affiliation":[]},{"given":"E. D.","family":"Resseguie","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rettie","sequence":"additional","affiliation":[]},{"given":"B.","family":"Reynolds","sequence":"additional","affiliation":[]},{"given":"E.","family":"Reynolds","sequence":"additional","affiliation":[]},{"given":"O. L.","family":"Rezanova","sequence":"additional","affiliation":[]},{"given":"P.","family":"Reznicek","sequence":"additional","affiliation":[]},{"given":"E.","family":"Ricci","sequence":"additional","affiliation":[]},{"given":"R.","family":"Richter","sequence":"additional","affiliation":[]},{"given":"S.","family":"Richter","sequence":"additional","affiliation":[]},{"given":"E.","family":"Richter-Was","sequence":"additional","affiliation":[]},{"given":"O.","family":"Ricken","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ridel","sequence":"additional","affiliation":[]},{"given":"P.","family":"Rieck","sequence":"additional","affiliation":[]},{"given":"O.","family":"Rifki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rijssenbeek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rimoldi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rimoldi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rinaldi","sequence":"additional","affiliation":[]},{"given":"G.","family":"Ripellino","sequence":"additional","affiliation":[]},{"given":"I.","family":"Riu","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Rivera Vergara","sequence":"additional","affiliation":[]},{"given":"F.","family":"Rizatdinova","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rizvi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Rizzi","sequence":"additional","affiliation":[]},{"given":"R. T.","family":"Roberts","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Robertson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Robin","sequence":"additional","affiliation":[]},{"given":"D.","family":"Robinson","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Robles Gajardo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Robles Manzano","sequence":"additional","affiliation":[]},{"given":"A.","family":"Robson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rocchi","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rocco","sequence":"additional","affiliation":[]},{"given":"C.","family":"Roda","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rodriguez Bosca","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rodriguez Perez","sequence":"additional","affiliation":[]},{"given":"D.","family":"Rodriguez Rodriguez","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Rodr\u00edguez Vera","sequence":"additional","affiliation":[]},{"given":"S.","family":"Roe","sequence":"additional","affiliation":[]},{"given":"O.","family":"R\u00f8hne","sequence":"additional","affiliation":[]},{"given":"R.","family":"R\u00f6hrig","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Rojas","sequence":"additional","affiliation":[]},{"given":"B.","family":"Roland","sequence":"additional","affiliation":[]},{"given":"C. P. A.","family":"Roland","sequence":"additional","affiliation":[]},{"given":"J.","family":"Roloff","sequence":"additional","affiliation":[]},{"given":"A.","family":"Romaniouk","sequence":"additional","affiliation":[]},{"given":"M.","family":"Romano","sequence":"additional","affiliation":[]},{"given":"N.","family":"Rompotis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ronzani","sequence":"additional","affiliation":[]},{"given":"L.","family":"Roos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rosati","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rosin","sequence":"additional","affiliation":[]},{"given":"B. J.","family":"Rosser","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"L. P.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rossini","sequence":"additional","affiliation":[]},{"given":"R.","family":"Rosten","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rotaru","sequence":"additional","affiliation":[]},{"given":"J.","family":"Rothberg","sequence":"additional","affiliation":[]},{"given":"B.","family":"Rottler","sequence":"additional","affiliation":[]},{"given":"D.","family":"Rousseau","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rovelli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Roy","sequence":"additional","affiliation":[]},{"given":"D.","family":"Roy","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rozanov","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Rozen","sequence":"additional","affiliation":[]},{"given":"X.","family":"Ruan","sequence":"additional","affiliation":[]},{"given":"F.","family":"R\u00fchr","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ruiz-Martinez","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rummler","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Rurikova","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Rusakovich","sequence":"additional","affiliation":[]},{"given":"H. L.","family":"Russell","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rustige","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Rutherfoord","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"R\u00fcttinger","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rybar","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rybkin","sequence":"additional","affiliation":[]},{"given":"E. B.","family":"Rye","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ryzhov","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Sabater Iglesias","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sabatini","sequence":"additional","affiliation":[]},{"given":"G.","family":"Sabato","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sacerdoti","sequence":"additional","affiliation":[]},{"given":"H. F-W.","family":"Sadrozinski","sequence":"additional","affiliation":[]},{"given":"R.","family":"Sadykov","sequence":"additional","affiliation":[]},{"given":"F.","family":"Safai Tehrani","sequence":"additional","affiliation":[]},{"given":"B.","family":"Safarzadeh Samani","sequence":"additional","affiliation":[]},{"given":"M.","family":"Safdari","sequence":"additional","affiliation":[]},{"given":"P.","family":"Saha","sequence":"additional","affiliation":[]},{"given":"S.","family":"Saha","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sahinsoy","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sahu","sequence":"additional","affiliation":[]},{"given":"M.","family":"Saimpert","sequence":"additional","affiliation":[]},{"given":"M.","family":"Saito","sequence":"additional","affiliation":[]},{"given":"T.","family":"Saito","sequence":"additional","affiliation":[]},{"given":"H.","family":"Sakamoto","sequence":"additional","affiliation":[]},{"given":"D.","family":"Salamani","sequence":"additional","affiliation":[]},{"given":"G.","family":"Salamanna","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Salazar Loyola","sequence":"additional","affiliation":[]},{"given":"A.","family":"Salnikov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Salt","sequence":"additional","affiliation":[]},{"given":"D.","family":"Salvatore","sequence":"additional","affiliation":[]},{"given":"F.","family":"Salvatore","sequence":"additional","affiliation":[]},{"given":"A.","family":"Salvucci","sequence":"additional","affiliation":[]},{"given":"A.","family":"Salzburger","sequence":"additional","affiliation":[]},{"given":"J.","family":"Samarati","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sammel","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sampsonidis","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sampsonidou","sequence":"additional","affiliation":[]},{"given":"J.","family":"S\u00e1nchez","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sanchez Pineda","sequence":"additional","affiliation":[]},{"given":"H.","family":"Sandaker","sequence":"additional","affiliation":[]},{"given":"C. O.","family":"Sander","sequence":"additional","affiliation":[]},{"given":"I. G.","family":"Sanderswood","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sandhoff","sequence":"additional","affiliation":[]},{"given":"C.","family":"Sandoval","sequence":"additional","affiliation":[]},{"given":"D. P. C.","family":"Sankey","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sannino","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Sano","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sansoni","sequence":"additional","affiliation":[]},{"given":"C.","family":"Santoni","sequence":"additional","affiliation":[]},{"given":"H.","family":"Santos","sequence":"additional","affiliation":[]},{"given":"S. N.","family":"Santpur","sequence":"additional","affiliation":[]},{"given":"A.","family":"Santra","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sapronov","sequence":"additional","affiliation":[]},{"given":"J. G.","family":"Saraiva","sequence":"additional","affiliation":[]},{"given":"J.","family":"Sardain","sequence":"additional","affiliation":[]},{"given":"O.","family":"Sasaki","sequence":"additional","affiliation":[]},{"given":"K.","family":"Sato","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sauerburger","sequence":"additional","affiliation":[]},{"given":"E.","family":"Sauvan","sequence":"additional","affiliation":[]},{"given":"P.","family":"Savard","sequence":"additional","affiliation":[]},{"given":"R.","family":"Sawada","sequence":"additional","affiliation":[]},{"given":"C.","family":"Sawyer","sequence":"additional","affiliation":[]},{"given":"L.","family":"Sawyer","sequence":"additional","affiliation":[]},{"given":"C.","family":"Sbarra","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sbrizzi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Scanlon","sequence":"additional","affiliation":[]},{"given":"J.","family":"Schaarschmidt","sequence":"additional","affiliation":[]},{"given":"P.","family":"Schacht","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Schachtner","sequence":"additional","affiliation":[]},{"given":"D.","family":"Schaefer","sequence":"additional","affiliation":[]},{"given":"L.","family":"Schaefer","sequence":"additional","affiliation":[]},{"given":"J.","family":"Schaeffer","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schaepe","sequence":"additional","affiliation":[]},{"given":"U.","family":"Sch\u00e4fer","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Schaffer","sequence":"additional","affiliation":[]},{"given":"D.","family":"Schaile","sequence":"additional","affiliation":[]},{"given":"R. D.","family":"Schamberger","sequence":"additional","affiliation":[]},{"given":"N.","family":"Scharmberg","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Schegelsky","sequence":"additional","affiliation":[]},{"given":"D.","family":"Scheirich","sequence":"additional","affiliation":[]},{"given":"F.","family":"Schenck","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schernau","sequence":"additional","affiliation":[]},{"given":"C.","family":"Schiavi","sequence":"additional","affiliation":[]},{"given":"L. K.","family":"Schildgen","sequence":"additional","affiliation":[]},{"given":"Z. M.","family":"Schillaci","sequence":"additional","affiliation":[]},{"given":"E. J.","family":"Schioppa","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schioppa","sequence":"additional","affiliation":[]},{"given":"K. E.","family":"Schleicher","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schlenker","sequence":"additional","affiliation":[]},{"given":"K. R.","family":"Schmidt-Sommerfeld","sequence":"additional","affiliation":[]},{"given":"K.","family":"Schmieden","sequence":"additional","affiliation":[]},{"given":"C.","family":"Schmitt","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schmitt","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schmitz","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Schmoeckel","sequence":"additional","affiliation":[]},{"given":"L.","family":"Schoeffel","sequence":"additional","affiliation":[]},{"given":"A.","family":"Schoening","sequence":"additional","affiliation":[]},{"given":"P. G.","family":"Scholer","sequence":"additional","affiliation":[]},{"given":"E.","family":"Schopf","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schott","sequence":"additional","affiliation":[]},{"given":"J. F. P.","family":"Schouwenberg","sequence":"additional","affiliation":[]},{"given":"J.","family":"Schovancova","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schramm","sequence":"additional","affiliation":[]},{"given":"F.","family":"Schroeder","sequence":"additional","affiliation":[]},{"given":"A.","family":"Schulte","sequence":"additional","affiliation":[]},{"given":"H-C.","family":"Schultz-Coulon","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schumacher","sequence":"additional","affiliation":[]},{"given":"B. A.","family":"Schumm","sequence":"additional","affiliation":[]},{"given":"Ph.","family":"Schune","sequence":"additional","affiliation":[]},{"given":"A.","family":"Schwartzman","sequence":"additional","affiliation":[]},{"given":"T. A.","family":"Schwarz","sequence":"additional","affiliation":[]},{"given":"Ph.","family":"Schwemling","sequence":"additional","affiliation":[]},{"given":"R.","family":"Schwienhorst","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sciandra","sequence":"additional","affiliation":[]},{"given":"G.","family":"Sciolla","sequence":"additional","affiliation":[]},{"given":"M.","family":"Scodeggio","sequence":"additional","affiliation":[]},{"given":"M.","family":"Scornajenghi","sequence":"additional","affiliation":[]},{"given":"F.","family":"Scuri","sequence":"additional","affiliation":[]},{"given":"F.","family":"Scutti","sequence":"additional","affiliation":[]},{"given":"L. M.","family":"Scyboz","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Sebastiani","sequence":"additional","affiliation":[]},{"given":"P.","family":"Seema","sequence":"additional","affiliation":[]},{"given":"S. C.","family":"Seidel","sequence":"additional","affiliation":[]},{"given":"A.","family":"Seiden","sequence":"additional","affiliation":[]},{"given":"B. D.","family":"Seidlitz","sequence":"additional","affiliation":[]},{"given":"T.","family":"Seiss","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Seixas","sequence":"additional","affiliation":[]},{"given":"G.","family":"Sekhniaidze","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Sekula","sequence":"additional","affiliation":[]},{"given":"N.","family":"Semprini-Cesari","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Serfon","sequence":"additional","affiliation":[]},{"given":"L.","family":"Serin","sequence":"additional","affiliation":[]},{"given":"L.","family":"Serkin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sessa","sequence":"additional","affiliation":[]},{"given":"H.","family":"Severini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sevova","sequence":"additional","affiliation":[]},{"given":"T.","family":"\u0160filigoj","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sforza","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sfyrla","sequence":"additional","affiliation":[]},{"given":"E.","family":"Shabalina","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Shahinian","sequence":"additional","affiliation":[]},{"given":"N. W.","family":"Shaikh","sequence":"additional","affiliation":[]},{"given":"D.","family":"Shaked Renous","sequence":"additional","affiliation":[]},{"given":"L. Y.","family":"Shan","sequence":"additional","affiliation":[]},{"given":"J. T.","family":"Shank","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shapiro","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sharma","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Sharma","sequence":"additional","affiliation":[]},{"given":"P. B.","family":"Shatalov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Shaw","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Shaw","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shehade","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Shen","sequence":"additional","affiliation":[]},{"given":"A. D.","family":"Sherman","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sherwood","sequence":"additional","affiliation":[]},{"given":"L.","family":"Shi","sequence":"additional","affiliation":[]},{"given":"S.","family":"Shimizu","sequence":"additional","affiliation":[]},{"given":"C. O.","family":"Shimmin","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Shimogama","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shimojima","sequence":"additional","affiliation":[]},{"given":"I. P. J.","family":"Shipsey","sequence":"additional","affiliation":[]},{"given":"S.","family":"Shirabe","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shiyakova","sequence":"additional","affiliation":[]},{"given":"J.","family":"Shlomi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Shmeleva","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Shochet","sequence":"additional","affiliation":[]},{"given":"J.","family":"Shojaii","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Shope","sequence":"additional","affiliation":[]},{"given":"S.","family":"Shrestha","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Shrif","sequence":"additional","affiliation":[]},{"given":"E.","family":"Shulga","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sicho","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Sickles","sequence":"additional","affiliation":[]},{"given":"P. E.","family":"Sidebo","sequence":"additional","affiliation":[]},{"given":"E.","family":"Sideras Haddad","sequence":"additional","affiliation":[]},{"given":"O.","family":"Sidiropoulou","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sidoti","sequence":"additional","affiliation":[]},{"given":"F.","family":"Siegert","sequence":"additional","affiliation":[]},{"given":"Dj.","family":"Sijacki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Silva","sequence":"additional","affiliation":[]},{"given":"M. V.","family":"Silva Oliveira","sequence":"additional","affiliation":[]},{"given":"S. B.","family":"Silverstein","sequence":"additional","affiliation":[]},{"given":"S.","family":"Simion","sequence":"additional","affiliation":[]},{"given":"R.","family":"Simoniello","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Simpson-allsop","sequence":"additional","affiliation":[]},{"given":"S.","family":"Simsek","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sinervo","sequence":"additional","affiliation":[]},{"given":"V.","family":"Sinetckii","sequence":"additional","affiliation":[]},{"given":"S.","family":"Singh","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sioli","sequence":"additional","affiliation":[]},{"given":"I.","family":"Siral","sequence":"additional","affiliation":[]},{"given":"S. Yu.","family":"Sivoklokov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Sj\u00f6lin","sequence":"additional","affiliation":[]},{"given":"E.","family":"Skorda","sequence":"additional","affiliation":[]},{"given":"P.","family":"Skubic","sequence":"additional","affiliation":[]},{"given":"M.","family":"Slawinska","sequence":"additional","affiliation":[]},{"given":"K.","family":"Sliwa","sequence":"additional","affiliation":[]},{"given":"R.","family":"Slovak","sequence":"additional","affiliation":[]},{"given":"V.","family":"Smakhtin","sequence":"additional","affiliation":[]},{"given":"B. H.","family":"Smart","sequence":"additional","affiliation":[]},{"given":"J.","family":"Smiesko","sequence":"additional","affiliation":[]},{"given":"N.","family":"Smirnov","sequence":"additional","affiliation":[]},{"given":"S. Yu.","family":"Smirnov","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Smirnov","sequence":"additional","affiliation":[]},{"given":"L. N.","family":"Smirnova","sequence":"additional","affiliation":[]},{"given":"O.","family":"Smirnova","sequence":"additional","affiliation":[]},{"given":"J. W.","family":"Smith","sequence":"additional","affiliation":[]},{"given":"M.","family":"Smizanska","sequence":"additional","affiliation":[]},{"given":"K.","family":"Smolek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Smykiewicz","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Snesarev","sequence":"additional","affiliation":[]},{"given":"H. L.","family":"Snoek","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Snyder","sequence":"additional","affiliation":[]},{"given":"S.","family":"Snyder","sequence":"additional","affiliation":[]},{"given":"R.","family":"Sobie","sequence":"additional","affiliation":[]},{"given":"A.","family":"Soffer","sequence":"additional","affiliation":[]},{"given":"A.","family":"S\u00f8gaard","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sohns","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Solans Sanchez","sequence":"additional","affiliation":[]},{"given":"E. Yu.","family":"Soldatov","sequence":"additional","affiliation":[]},{"given":"U.","family":"Soldevila","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Solodkov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Soloshenko","sequence":"additional","affiliation":[]},{"given":"O. V.","family":"Solovyanov","sequence":"additional","affiliation":[]},{"given":"V.","family":"Solovyev","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sommer","sequence":"additional","affiliation":[]},{"given":"H.","family":"Son","sequence":"additional","affiliation":[]},{"given":"W.","family":"Song","sequence":"additional","affiliation":[]},{"given":"W. Y.","family":"Song","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sopczak","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Sopio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sopkova","sequence":"additional","affiliation":[]},{"given":"C. L.","family":"Sotiropoulou","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sottocornola","sequence":"additional","affiliation":[]},{"given":"R.","family":"Soualah","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Soukharev","sequence":"additional","affiliation":[]},{"given":"D.","family":"South","sequence":"additional","affiliation":[]},{"given":"S.","family":"Spagnolo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spalla","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spangenberg","sequence":"additional","affiliation":[]},{"given":"F.","family":"Span\u00f2","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sperlich","sequence":"additional","affiliation":[]},{"given":"T. M.","family":"Spieker","sequence":"additional","affiliation":[]},{"given":"G.","family":"Spigo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spina","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Spiteri","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spousta","sequence":"additional","affiliation":[]},{"given":"A.","family":"Stabile","sequence":"additional","affiliation":[]},{"given":"R.","family":"Stamen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stamenkovic","sequence":"additional","affiliation":[]},{"given":"E.","family":"Stanecka","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stanislaus","sequence":"additional","affiliation":[]},{"given":"M. M.","family":"Stanitzki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stankaityte","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stapf","sequence":"additional","affiliation":[]},{"given":"E. A.","family":"Starchenko","sequence":"additional","affiliation":[]},{"given":"G. H.","family":"Stark","sequence":"additional","affiliation":[]},{"given":"J.","family":"Stark","sequence":"additional","affiliation":[]},{"given":"P.","family":"Staroba","sequence":"additional","affiliation":[]},{"given":"P.","family":"Starovoitov","sequence":"additional","affiliation":[]},{"given":"S.","family":"St\u00e4rz","sequence":"additional","affiliation":[]},{"given":"R.","family":"Staszewski","sequence":"additional","affiliation":[]},{"given":"G.","family":"Stavropoulos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stegler","sequence":"additional","affiliation":[]},{"given":"P.","family":"Steinberg","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Steinhebel","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stelzer","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Stelzer","sequence":"additional","affiliation":[]},{"given":"O.","family":"Stelzer-Chilton","sequence":"additional","affiliation":[]},{"given":"H.","family":"Stenzel","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Stevenson","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Stewart","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Stockton","sequence":"additional","affiliation":[]},{"given":"G.","family":"Stoicea","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stolarski","sequence":"additional","affiliation":[]},{"given":"S.","family":"Stonjek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Straessner","sequence":"additional","affiliation":[]},{"given":"J.","family":"Strandberg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Strandberg","sequence":"additional","affiliation":[]},{"given":"M.","family":"Strauss","sequence":"additional","affiliation":[]},{"given":"P.","family":"Strizenec","sequence":"additional","affiliation":[]},{"given":"R.","family":"Str\u00f6hmer","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Strom","sequence":"additional","affiliation":[]},{"given":"R.","family":"Stroynowski","sequence":"additional","affiliation":[]},{"given":"A.","family":"Strubig","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Stucci","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stugu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Stupak","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Styles","sequence":"additional","affiliation":[]},{"given":"D.","family":"Su","sequence":"additional","affiliation":[]},{"given":"W.","family":"Su","sequence":"additional","affiliation":[]},{"given":"S.","family":"Suchek","sequence":"additional","affiliation":[]},{"given":"V. V.","family":"Sulin","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Sullivan","sequence":"additional","affiliation":[]},{"given":"D. M. S.","family":"Sultan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sultansoy","sequence":"additional","affiliation":[]},{"given":"T.","family":"Sumida","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sun","sequence":"additional","affiliation":[]},{"given":"X.","family":"Sun","sequence":"additional","affiliation":[]},{"given":"K.","family":"Suruliz","sequence":"additional","affiliation":[]},{"given":"C. J. E.","family":"Suster","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Sutton","sequence":"additional","affiliation":[]},{"given":"S.","family":"Suzuki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Svatos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Swiatlowski","sequence":"additional","affiliation":[]},{"given":"S. P.","family":"Swift","sequence":"additional","affiliation":[]},{"given":"T.","family":"Swirski","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sydorenko","sequence":"additional","affiliation":[]},{"given":"I.","family":"Sykora","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sykora","sequence":"additional","affiliation":[]},{"given":"T.","family":"Sykora","sequence":"additional","affiliation":[]},{"given":"D.","family":"Ta","sequence":"additional","affiliation":[]},{"given":"K.","family":"Tackmann","sequence":"additional","affiliation":[]},{"given":"J.","family":"Taenzer","sequence":"additional","affiliation":[]},{"given":"A.","family":"Taffard","sequence":"additional","affiliation":[]},{"given":"R.","family":"Tafirout","sequence":"additional","affiliation":[]},{"given":"H.","family":"Takai","sequence":"additional","affiliation":[]},{"given":"R.","family":"Takashima","sequence":"additional","affiliation":[]},{"given":"K.","family":"Takeda","sequence":"additional","affiliation":[]},{"given":"T.","family":"Takeshita","sequence":"additional","affiliation":[]},{"given":"E. P.","family":"Takeva","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Takubo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Talby","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Talyshev","sequence":"additional","affiliation":[]},{"given":"N. M.","family":"Tamir","sequence":"additional","affiliation":[]},{"given":"J.","family":"Tanaka","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tanaka","sequence":"additional","affiliation":[]},{"given":"R.","family":"Tanaka","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tapia Araya","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tapprogge","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tarek Abouelfadl Mohamed","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tarem","sequence":"additional","affiliation":[]},{"given":"K.","family":"Tariq","sequence":"additional","affiliation":[]},{"given":"G.","family":"Tarna","sequence":"additional","affiliation":[]},{"given":"G. F.","family":"Tartarelli","sequence":"additional","affiliation":[]},{"given":"P.","family":"Tas","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tasevsky","sequence":"additional","affiliation":[]},{"given":"T.","family":"Tashiro","sequence":"additional","affiliation":[]},{"given":"E.","family":"Tassi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tavares Delgado","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Tayalati","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Taylor","sequence":"additional","affiliation":[]},{"given":"G. N.","family":"Taylor","sequence":"additional","affiliation":[]},{"given":"W.","family":"Taylor","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Tee","sequence":"additional","affiliation":[]},{"given":"R.","family":"Teixeira De Lima","sequence":"additional","affiliation":[]},{"given":"P.","family":"Teixeira-Dias","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ten Kate","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Teoh","sequence":"additional","affiliation":[]},{"given":"S.","family":"Terada","sequence":"additional","affiliation":[]},{"given":"K.","family":"Terashi","sequence":"additional","affiliation":[]},{"given":"J.","family":"Terron","sequence":"additional","affiliation":[]},{"given":"S.","family":"Terzo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Testa","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Teuscher","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Thais","sequence":"additional","affiliation":[]},{"given":"T.","family":"Theveneaux-Pelzer","sequence":"additional","affiliation":[]},{"given":"F.","family":"Thiele","sequence":"additional","affiliation":[]},{"given":"D. W.","family":"Thomas","sequence":"additional","affiliation":[]},{"given":"J. O.","family":"Thomas","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Thomas","sequence":"additional","affiliation":[]},{"given":"P. D.","family":"Thompson","sequence":"additional","affiliation":[]},{"given":"L. A.","family":"Thomsen","sequence":"additional","affiliation":[]},{"given":"E.","family":"Thomson","sequence":"additional","affiliation":[]},{"given":"E. J.","family":"Thorpe","sequence":"additional","affiliation":[]},{"given":"R. E.","family":"Ticse Torres","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tikhomirov","sequence":"additional","affiliation":[]},{"given":"Yu. A.","family":"Tikhonov","sequence":"additional","affiliation":[]},{"given":"S.","family":"Timoshenko","sequence":"additional","affiliation":[]},{"given":"P.","family":"Tipton","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tisserant","sequence":"additional","affiliation":[]},{"given":"K.","family":"Todome","sequence":"additional","affiliation":[]},{"given":"S.","family":"Todorova-Nova","sequence":"additional","affiliation":[]},{"given":"S.","family":"Todt","sequence":"additional","affiliation":[]},{"given":"J.","family":"Tojo","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tok\u00e1r","sequence":"additional","affiliation":[]},{"given":"K.","family":"Tokushuku","sequence":"additional","affiliation":[]},{"given":"E.","family":"Tolley","sequence":"additional","affiliation":[]},{"given":"K. G.","family":"Tomiwa","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tomoto","sequence":"additional","affiliation":[]},{"given":"L.","family":"Tompkins","sequence":"additional","affiliation":[]},{"given":"B.","family":"Tong","sequence":"additional","affiliation":[]},{"given":"P.","family":"Tornambe","sequence":"additional","affiliation":[]},{"given":"E.","family":"Torrence","sequence":"additional","affiliation":[]},{"given":"H.","family":"Torres","sequence":"additional","affiliation":[]},{"given":"E.","family":"Torr\u00f3 Pastor","sequence":"additional","affiliation":[]},{"given":"C.","family":"Tosciri","sequence":"additional","affiliation":[]},{"given":"J.","family":"Toth","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Tovey","sequence":"additional","affiliation":[]},{"given":"A.","family":"Traeet","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Treado","sequence":"additional","affiliation":[]},{"given":"T.","family":"Trefzger","sequence":"additional","affiliation":[]},{"given":"F.","family":"Tresoldi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tricoli","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Trigger","sequence":"additional","affiliation":[]},{"given":"S.","family":"Trincaz-Duvoid","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Trischuk","sequence":"additional","affiliation":[]},{"given":"W.","family":"Trischuk","sequence":"additional","affiliation":[]},{"given":"B.","family":"Trocm\u00e9","sequence":"additional","affiliation":[]},{"given":"A.","family":"Trofymov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Troncon","sequence":"additional","affiliation":[]},{"given":"F.","family":"Trovato","sequence":"additional","affiliation":[]},{"given":"L.","family":"Truong","sequence":"additional","affiliation":[]},{"given":"M.","family":"Trzebinski","sequence":"additional","affiliation":[]},{"given":"A.","family":"Trzupek","sequence":"additional","affiliation":[]},{"given":"F.","family":"Tsai","sequence":"additional","affiliation":[]},{"given":"J. C-L.","family":"Tseng","sequence":"additional","affiliation":[]},{"given":"P. V.","family":"Tsiareshka","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tsirigotis","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tsiskaridze","sequence":"additional","affiliation":[]},{"given":"E. G.","family":"Tskhadadze","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tsopoulou","sequence":"additional","affiliation":[]},{"given":"I. I.","family":"Tsukerman","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tsulaia","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tsuno","sequence":"additional","affiliation":[]},{"given":"D.","family":"Tsybychev","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Tu","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tudorache","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tudorache","sequence":"additional","affiliation":[]},{"given":"T. T.","family":"Tulbure","sequence":"additional","affiliation":[]},{"given":"A. N.","family":"Tuna","sequence":"additional","affiliation":[]},{"given":"S.","family":"Turchikhin","sequence":"additional","affiliation":[]},{"given":"D.","family":"Turgeman","sequence":"additional","affiliation":[]},{"given":"I. Turk","family":"Cakir","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Turner","sequence":"additional","affiliation":[]},{"given":"R.","family":"Turra","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Tuts","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tzamarias","sequence":"additional","affiliation":[]},{"given":"E.","family":"Tzovara","sequence":"additional","affiliation":[]},{"given":"G.","family":"Ucchielli","sequence":"additional","affiliation":[]},{"given":"K.","family":"Uchida","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ukegawa","sequence":"additional","affiliation":[]},{"given":"G.","family":"Unal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Undrus","sequence":"additional","affiliation":[]},{"given":"G.","family":"Unel","sequence":"additional","affiliation":[]},{"given":"F. C.","family":"Ungaro","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Unno","sequence":"additional","affiliation":[]},{"given":"K.","family":"Uno","sequence":"additional","affiliation":[]},{"given":"J.","family":"Urban","sequence":"additional","affiliation":[]},{"given":"P.","family":"Urquijo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Usai","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Uysal","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vacek","sequence":"additional","affiliation":[]},{"given":"B.","family":"Vachon","sequence":"additional","affiliation":[]},{"given":"K. O. H.","family":"Vadla","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vaidya","sequence":"additional","affiliation":[]},{"given":"C.","family":"Valderanis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Valdes Santurio","sequence":"additional","affiliation":[]},{"given":"M.","family":"Valente","sequence":"additional","affiliation":[]},{"given":"S.","family":"Valentinetti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Valero","sequence":"additional","affiliation":[]},{"given":"L.","family":"Val\u00e9ry","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Vallance","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vallier","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Valls Ferrer","sequence":"additional","affiliation":[]},{"given":"T. R.","family":"Van Daalen","sequence":"additional","affiliation":[]},{"given":"P.","family":"Van Gemmeren","sequence":"additional","affiliation":[]},{"given":"I.","family":"Van Vulpen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vanadia","sequence":"additional","affiliation":[]},{"given":"W.","family":"Vandelli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vandenbroucke","sequence":"additional","affiliation":[]},{"given":"E. R.","family":"Vandewall","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vaniachine","sequence":"additional","affiliation":[]},{"given":"D.","family":"Vannicola","sequence":"additional","affiliation":[]},{"given":"R.","family":"Vari","sequence":"additional","affiliation":[]},{"given":"E. W.","family":"Varnes","sequence":"additional","affiliation":[]},{"given":"C.","family":"Varni","sequence":"additional","affiliation":[]},{"given":"T.","family":"Varol","sequence":"additional","affiliation":[]},{"given":"D.","family":"Varouchas","sequence":"additional","affiliation":[]},{"given":"K. E.","family":"Varvell","sequence":"additional","affiliation":[]},{"given":"M. E.","family":"Vasile","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Vasquez","sequence":"additional","affiliation":[]},{"given":"F.","family":"Vazeille","sequence":"additional","affiliation":[]},{"given":"D.","family":"Vazquez Furelos","sequence":"additional","affiliation":[]},{"given":"T.","family":"Vazquez Schroeder","sequence":"additional","affiliation":[]},{"given":"J.","family":"Veatch","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vecchio","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Veen","sequence":"additional","affiliation":[]},{"given":"L. M.","family":"Veloce","sequence":"additional","affiliation":[]},{"given":"F.","family":"Veloso","sequence":"additional","affiliation":[]},{"given":"S.","family":"Veneziano","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ventura","sequence":"additional","affiliation":[]},{"given":"N.","family":"Venturi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Verbytskyi","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vercesi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Verducci","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Vergel Infante","sequence":"additional","affiliation":[]},{"given":"C.","family":"Vergis","sequence":"additional","affiliation":[]},{"given":"W.","family":"Verkerke","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Vermeulen","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Vermeulen","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Vetterli","sequence":"additional","affiliation":[]},{"given":"N.","family":"Viaux Maira","sequence":"additional","affiliation":[]},{"given":"M. Vicente","family":"Barreto Pinto","sequence":"additional","affiliation":[]},{"given":"T.","family":"Vickey","sequence":"additional","affiliation":[]},{"given":"O. E. Vickey","family":"Boeriu","sequence":"additional","affiliation":[]},{"given":"G. H. A.","family":"Viehhauser","sequence":"additional","affiliation":[]},{"given":"L.","family":"Vigani","sequence":"additional","affiliation":[]},{"given":"M.","family":"Villa","sequence":"additional","affiliation":[]},{"given":"M.","family":"Villaplana Perez","sequence":"additional","affiliation":[]},{"given":"E.","family":"Vilucchi","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Vincter","sequence":"additional","affiliation":[]},{"given":"G. S.","family":"Virdee","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vishwakarma","sequence":"additional","affiliation":[]},{"given":"C.","family":"Vittori","sequence":"additional","affiliation":[]},{"given":"I.","family":"Vivarelli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vogel","sequence":"additional","affiliation":[]},{"given":"P.","family":"Vokac","sequence":"additional","affiliation":[]},{"given":"S. E.","family":"von Buddenbrock","sequence":"additional","affiliation":[]},{"given":"E.","family":"Von Toerne","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vorobel","sequence":"additional","affiliation":[]},{"given":"K.","family":"Vorobev","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vos","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Vossebeld","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vozak","sequence":"additional","affiliation":[]},{"given":"N.","family":"Vranjes","sequence":"additional","affiliation":[]},{"given":"M. Vranjes","family":"Milosavljevic","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vrba","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vreeswijk","sequence":"additional","affiliation":[]},{"given":"R.","family":"Vuillermet","sequence":"additional","affiliation":[]},{"given":"I.","family":"Vukotic","sequence":"additional","affiliation":[]},{"given":"P.","family":"Wagner","sequence":"additional","affiliation":[]},{"given":"W.","family":"Wagner","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wagner-Kuhr","sequence":"additional","affiliation":[]},{"given":"S.","family":"Wahdan","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wahlberg","sequence":"additional","affiliation":[]},{"given":"V. M.","family":"Walbrecht","sequence":"additional","affiliation":[]},{"given":"J.","family":"Walder","sequence":"additional","affiliation":[]},{"given":"R.","family":"Walker","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Walker","sequence":"additional","affiliation":[]},{"given":"W.","family":"Walkowiak","sequence":"additional","affiliation":[]},{"given":"V.","family":"Wallangen","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"A. Z.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"F.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"P.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"R.-J.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"R.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"R.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"W. T.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"W. X.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wanotayaroj","sequence":"additional","affiliation":[]},{"given":"A.","family":"Warburton","sequence":"additional","affiliation":[]},{"given":"C. P.","family":"Ward","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Wardrope","sequence":"additional","affiliation":[]},{"given":"N.","family":"Warrack","sequence":"additional","affiliation":[]},{"given":"A.","family":"Washbrook","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Watson","sequence":"additional","affiliation":[]},{"given":"M. F.","family":"Watson","sequence":"additional","affiliation":[]},{"given":"G.","family":"Watts","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Waugh","sequence":"additional","affiliation":[]},{"given":"A. F.","family":"Webb","sequence":"additional","affiliation":[]},{"given":"C.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Weidberg","sequence":"additional","affiliation":[]},{"given":"J.","family":"Weingarten","sequence":"additional","affiliation":[]},{"given":"M.","family":"Weirich","sequence":"additional","affiliation":[]},{"given":"C.","family":"Weiser","sequence":"additional","affiliation":[]},{"given":"P. S.","family":"Wells","sequence":"additional","affiliation":[]},{"given":"T.","family":"Wenaus","sequence":"additional","affiliation":[]},{"given":"T.","family":"Wengler","sequence":"additional","affiliation":[]},{"given":"S.","family":"Wenig","sequence":"additional","affiliation":[]},{"given":"N.","family":"Wermes","sequence":"additional","affiliation":[]},{"given":"M. D.","family":"Werner","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wessels","sequence":"additional","affiliation":[]},{"given":"T. D.","family":"Weston","sequence":"additional","affiliation":[]},{"given":"K.","family":"Whalen","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Whallon","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Wharton","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"White","sequence":"additional","affiliation":[]},{"given":"A.","family":"White","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"White","sequence":"additional","affiliation":[]},{"given":"D.","family":"Whiteson","sequence":"additional","affiliation":[]},{"given":"B. W.","family":"Whitmore","sequence":"additional","affiliation":[]},{"given":"W.","family":"Wiedenmann","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wiel","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wielers","sequence":"additional","affiliation":[]},{"given":"N.","family":"Wieseotte","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wiglesworth","sequence":"additional","affiliation":[]},{"given":"L. A. M.","family":"Wiik-Fuchs","sequence":"additional","affiliation":[]},{"given":"H. G.","family":"Wilkens","sequence":"additional","affiliation":[]},{"given":"L. J.","family":"Wilkins","sequence":"additional","affiliation":[]},{"given":"H. H.","family":"Williams","sequence":"additional","affiliation":[]},{"given":"S.","family":"Williams","sequence":"additional","affiliation":[]},{"given":"C.","family":"Willis","sequence":"additional","affiliation":[]},{"given":"S.","family":"Willocq","sequence":"additional","affiliation":[]},{"given":"I.","family":"Wingerter-Seez","sequence":"additional","affiliation":[]},{"given":"E.","family":"Winkels","sequence":"additional","affiliation":[]},{"given":"F.","family":"Winklmeier","sequence":"additional","affiliation":[]},{"given":"O. J.","family":"Winston","sequence":"additional","affiliation":[]},{"given":"B. T.","family":"Winter","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wittgen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wobisch","sequence":"additional","affiliation":[]},{"given":"A.","family":"Wolf","sequence":"additional","affiliation":[]},{"given":"T. M. H.","family":"Wolf","sequence":"additional","affiliation":[]},{"given":"R.","family":"Wolff","sequence":"additional","affiliation":[]},{"given":"R.","family":"W\u00f6lker","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wollrath","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"Wolter","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wolters","sequence":"additional","affiliation":[]},{"given":"V. W. S.","family":"Wong","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Woods","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Worm","sequence":"additional","affiliation":[]},{"given":"B. K.","family":"Wosiek","sequence":"additional","affiliation":[]},{"given":"K. W.","family":"Wo\u017aniak","sequence":"additional","affiliation":[]},{"given":"K.","family":"Wraight","sequence":"additional","affiliation":[]},{"given":"S. L.","family":"Wu","sequence":"additional","affiliation":[]},{"given":"X.","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Wu","sequence":"additional","affiliation":[]},{"given":"T. R.","family":"Wyatt","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Wynne","sequence":"additional","affiliation":[]},{"given":"S.","family":"Xella","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Xi","sequence":"additional","affiliation":[]},{"given":"X.","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"I.","family":"Xiotidis","sequence":"additional","affiliation":[]},{"given":"D.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"H.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"L.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"T.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"W.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"B.","family":"Yabsley","sequence":"additional","affiliation":[]},{"given":"S.","family":"Yacoob","sequence":"additional","affiliation":[]},{"given":"K.","family":"Yajima","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Yallup","sequence":"additional","affiliation":[]},{"given":"N.","family":"Yamaguchi","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yamaguchi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Yamamoto","sequence":"additional","affiliation":[]},{"given":"M.","family":"Yamatani","sequence":"additional","affiliation":[]},{"given":"T.","family":"Yamazaki","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yamazaki","sequence":"additional","affiliation":[]},{"given":"J.","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Yan","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"H. T.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"T.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"X.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"W-M.","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Y. C.","family":"Yap","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yasu","sequence":"additional","affiliation":[]},{"given":"E.","family":"Yatsenko","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ye","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ye","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ye","sequence":"additional","affiliation":[]},{"given":"I.","family":"Yeletskikh","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Yexley","sequence":"additional","affiliation":[]},{"given":"E.","family":"Yigitbasi","sequence":"additional","affiliation":[]},{"given":"K.","family":"Yorita","sequence":"additional","affiliation":[]},{"given":"K.","family":"Yoshihara","sequence":"additional","affiliation":[]},{"given":"C. J. S.","family":"Young","sequence":"additional","affiliation":[]},{"given":"C.","family":"Young","sequence":"additional","affiliation":[]},{"given":"J.","family":"Yu","sequence":"additional","affiliation":[]},{"given":"R.","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"X.","family":"Yue","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zaazoua","sequence":"additional","affiliation":[]},{"given":"B.","family":"Zabinski","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zacharis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Zaffaroni","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Zaitsev","sequence":"additional","affiliation":[]},{"given":"T.","family":"Zakareishvili","sequence":"additional","affiliation":[]},{"given":"N.","family":"Zakharchuk","sequence":"additional","affiliation":[]},{"given":"S.","family":"Zambito","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zanzi","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Zaripovas","sequence":"additional","affiliation":[]},{"given":"S. V.","family":"Zei\u00dfner","sequence":"additional","affiliation":[]},{"given":"C.","family":"Zeitnitz","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zemaityte","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"O.","family":"Zenin","sequence":"additional","affiliation":[]},{"given":"T.","family":"\u017deni\u0161","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zerwas","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zgubi\u010d","sequence":"additional","affiliation":[]},{"given":"B.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"D. F.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"H.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"J.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"L.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"L.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"R.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"X.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"X.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"P.","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"A.","family":"Zhemchugov","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zhong","sequence":"additional","affiliation":[]},{"given":"B.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"C.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"N.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"C. G.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"C.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"H. L.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"H.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"X.","family":"Zhuang","sequence":"additional","affiliation":[]},{"given":"K.","family":"Zhukov","sequence":"additional","affiliation":[]},{"given":"V.","family":"Zhulanov","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zieminska","sequence":"additional","affiliation":[]},{"given":"N. I.","family":"Zimine","sequence":"additional","affiliation":[]},{"given":"S.","family":"Zimmermann","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zinonos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ziolkowski","sequence":"additional","affiliation":[]},{"given":"L.","family":"\u017divkovi\u0107","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zobernig","sequence":"additional","affiliation":[]},{"given":"A.","family":"Zoccoli","sequence":"additional","affiliation":[]},{"given":"K.","family":"Zoch","sequence":"additional","affiliation":[]},{"given":"T. G.","family":"Zorbas","sequence":"additional","affiliation":[]},{"given":"R.","family":"Zou","sequence":"additional","affiliation":[]},{"given":"L.","family":"Zwalinski","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,2,9]]},"reference":[{"key":"1757_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.physletb.2012.08.020","volume":"716","author":"ATLAS Collaboration.","year":"2012","unstructured":"ATLAS Collaboration. Observation of a new particle in the search for the Standard Model Higgs boson with the ATLAS detector at the LHC. Phys. Lett. B 716, 1\u201329 (2012).","journal-title":"Phys. Lett. B"},{"key":"1757_CR2","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1016\/j.physletb.2012.08.021","volume":"716","author":"CMS Collaboration.","year":"2012","unstructured":"CMS Collaboration. Observation of a new boson at a mass of 125\u2009GeV with the CMS experiment at the LHC. Phys. Lett. B 716, 30\u201361 (2012).","journal-title":"Phys. Lett. B"},{"key":"1757_CR3","doi-asserted-by":"publisher","first-page":"883","DOI":"10.1103\/PhysRevLett.38.883","volume":"38","author":"BW Lee","year":"1977","unstructured":"Lee, B. W., Quigg, C. & Thacker, H. B. Strength of weak interactions at very high energies and the Higgs boson mass. Phys. Rev. Lett. 38, 883\u2013885 (1977).","journal-title":"Phys. Rev. Lett."},{"key":"1757_CR4","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1016\/0550-3213(85)90580-2","volume":"261","author":"MS Chanowitz","year":"1985","unstructured":"Chanowitz, M. S. & Gaillard, M. K. The TeV physics of strongly interacting W\u2019s and Z\u2019s. Nucl. Phys. B 261, 379\u2013431 (1985).","journal-title":"Nucl. Phys. B"},{"key":"1757_CR5","unstructured":"Szleper, M. The Higgs boson and the physics of WW scattering before and after Higgs discovery. Preprint at https:\/\/arxiv.org\/abs\/1412.8367 (2014)."},{"key":"1757_CR6","doi-asserted-by":"publisher","first-page":"081","DOI":"10.1007\/JHEP10(2011)081","volume":"10","author":"R Contino","year":"2011","unstructured":"Contino, R., Pappadopulo, D., Marzocca, D. & Rattazzi, R. On the effect of resonances in composite Higgs phenomenology. J. High Energy Phys. 10, 081 (2011).","journal-title":"J. High Energy Phys."},{"key":"1757_CR7","doi-asserted-by":"publisher","first-page":"034","DOI":"10.1088\/1126-6708\/2002\/07\/034","volume":"07","author":"N Arkani-Hamed","year":"2002","unstructured":"Arkani-Hamed, N., Cohen, A. G., Katz, E. & Nelson, A. E. The littlest Higgs. J. High Energy Phys. 07, 034 (2002).","journal-title":"J. High Energy Phys."},{"key":"1757_CR8","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-013-2704-3","volume":"74","author":"A Djouadi","year":"2014","unstructured":"Djouadi, A. Implications of the Higgs discovery for the MSSM. Eur. Phys. J. C 74, 2704 (2014).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR9","doi-asserted-by":"publisher","first-page":"073005","DOI":"10.1103\/PhysRevD.74.073005","volume":"74","author":"OJP Eboli","year":"2006","unstructured":"Eboli, O. J. P., Gonzalez-Garcia, M. C. & Mizukoshi, J. K. pp\u2009\u2192\u2009jje\u00b1\u03bc\u00b1\u03bd\u03bd and jje\u00b1\u03bc\u2213\u03bd\u03bd at $$O({\\alpha }_{em}^{6})$$ and $$O({\\alpha }_{em}^{6})$$ for the study of the quartic electroweak gauge boson vertex at CERN LHC. Phys. Rev. D 74, 073005 (2006).","journal-title":"Phys. Rev. D"},{"key":"1757_CR10","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/JHEP05(2022)039","volume":"2022","author":"R Bellan","year":"2022","unstructured":"Bellan, R. et al. A sensitivity study of VBS and diboson WW to dimension-6 EFT operators at the LHC. J. High Energy Phys. 2022, 39 (2022).","journal-title":"J. High Energy Phys."},{"key":"1757_CR11","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1140\/epjc\/s10052-019-6893-2","volume":"79","author":"R Gomez-Ambrosio","year":"2019","unstructured":"Gomez-Ambrosio, R. Studies of dimension-six EFT effects in vector boson scattering. Eur. Phys. J. C 79, 389 (2019).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR12","doi-asserted-by":"publisher","first-page":"161801","DOI":"10.1103\/PhysRevLett.123.161801","volume":"123","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Observation of electroweak production of a same-sign W boson pair in association with two jets in pp collisions at $$\\sqrt{s}=13$$ TeV with the ATLAS detector. Phys. Rev. Lett. 123, 161801 (2019).","journal-title":"Phys. Rev. Lett."},{"key":"1757_CR13","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/j.physletb.2019.05.012","volume":"793","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Observation of electroweak W\u00b1Z boson pair production in association with two jets in pp collisions at $$\\sqrt{s}=$$ 13 TeV with the ATLAS detector. Phys. Lett. B 793, 469\u2013492 (2019).","journal-title":"Phys. Lett. B"},{"key":"1757_CR14","doi-asserted-by":"publisher","first-page":"081801","DOI":"10.1103\/PhysRevLett.120.081801","volume":"120","author":"CMS Collaboration.","year":"2018","unstructured":"CMS Collaboration. Observation of electroweak production of same-sign W boson pairs in the two jet and two same-sign lepton final state in proton\u2013proton collisions at $$\\sqrt{s}=$$ 13 TeV. Phys. Rev. Lett. 120, 081801 (2018).","journal-title":"Phys. Rev. Lett."},{"key":"1757_CR15","unstructured":"CMS Collaboration. Measurements of production cross sections of WZ and same-sign WW boson pairs in association with two jets in proton\u2013proton collisions at $$\\sqrt{s}=$$ 13 TeV. Phys. Lett. B 809, 135710 (2020)."},{"key":"1757_CR16","doi-asserted-by":"publisher","first-page":"135992","DOI":"10.1016\/j.physletb.2020.135992","volume":"812","author":"CMS Collaboration.","year":"2021","unstructured":"CMS Collaboration. Evidence for electroweak production of four charged leptons and two jets in proton\u2013proton collisions at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$. Phys. Lett. B 812, 135992 (2021).","journal-title":"Phys. Lett. B"},{"key":"1757_CR17","doi-asserted-by":"publisher","first-page":"053003","DOI":"10.1103\/PhysRevD.90.053003","volume":"90","author":"C Englert","year":"2014","unstructured":"Englert, C. & Spannowsky, M. Limitations and opportunities of off-shell coupling measurements. Phys. Rev. D 90, 053003 (2014).","journal-title":"Phys. Rev. D"},{"key":"1757_CR18","unstructured":"ATLAS Collaboration. The ATLAS experiment at the CERN Large Hadron Collider. J. Instrum. 3, S08003 (2008)."},{"key":"1757_CR19","unstructured":"ATLAS Collaboration. ATLAS insertable B-layer technical design report, ATLAS-TDR-19. CERN https:\/\/cds.cern.ch\/record\/1291633 (2010)."},{"key":"1757_CR20","doi-asserted-by":"publisher","first-page":"T05008","DOI":"10.1088\/1748-0221\/13\/05\/T05008","volume":"13","author":"B Abbott","year":"2018","unstructured":"Abbott, B. et al. Production and integration of the ATLAS Insertable B-Layer. J. Instrum. 13, T05008 (2018).","journal-title":"J. Instrum."},{"key":"1757_CR21","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-017-4852-3","volume":"77","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Performance of the ATLAS trigger system in 2015. Eur. Phys. J. C 77, 317 (2017).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR22","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/JHEP03(2014)141","volume":"2014","author":"B J\u00e4ger","year":"2014","unstructured":"J\u00e4ger, B., Karlberg, A. & Zanderighi, G. Electroweak ZZjj production in the Standard Model and beyond in the POWHEG-BOX V2. J. High Energy Phys. 2014, 141 (2014).","journal-title":"J. High Energy Phys."},{"key":"1757_CR23","doi-asserted-by":"publisher","first-page":"040","DOI":"10.1007\/JHEP04(2015)040","volume":"04","author":"RD Ball","year":"2015","unstructured":"Ball, R. D. et al. Parton distributions for the LHC run II. J. High Energy Phys. 04, 040 (2015).","journal-title":"J. High Energy Phys."},{"key":"1757_CR24","doi-asserted-by":"publisher","first-page":"079","DOI":"10.1007\/JHEP07(2014)079","volume":"07","author":"J Alwall","year":"2014","unstructured":"Alwall, J. et al. The automated computation of tree-level and next-to-leading order differential cross sections, and their matching to parton shower simulations. J. High Energy Phys. 07, 079 (2014).","journal-title":"J. High Energy Phys."},{"key":"1757_CR25","doi-asserted-by":"publisher","first-page":"007","DOI":"10.1088\/1126-6708\/2009\/02\/007","volume":"02","author":"T Gleisberg","year":"2009","unstructured":"Gleisberg, T. et al. Event generation with SHERPA 1.1. J. High Energy Phys. 02, 007 (2009).","journal-title":"J. High Energy Phys."},{"key":"1757_CR26","doi-asserted-by":"publisher","first-page":"082","DOI":"10.1007\/JHEP12(2013)082","volume":"12","author":"N Kauer","year":"2013","unstructured":"Kauer, N. Interference effects for H\u2009\u2192\u2009WW\/ZZ $$\\to \\ell {\\overline{\\nu }}_{\\ell }\\overline{\\ell }{\\nu }_{\\ell }$$ searches in gluon fusion at the LHC. J. High Energy Phys. 12, 082, (2013).","journal-title":"J. High Energy Phys."},{"key":"1757_CR27","doi-asserted-by":"publisher","first-page":"033009","DOI":"10.1103\/PhysRevD.89.033009","volume":"89","author":"J Gao","year":"2014","unstructured":"Gao, J. et al. CT10 next-to-next-to-leading order global analysis of QCD. Phys. Rev. D 89, 033009 (2014).","journal-title":"Phys. Rev. D"},{"key":"1757_CR28","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1088\/1126-6708\/2007\/09\/126","volume":"09","author":"S Frixione","year":"2007","unstructured":"Frixione, S., Ridolfi, G. & Nason, P. A positive-weight next-to-leading-order Monte Carlo for heavy flavour hadroproduction. J. High Energy Phys. 09, 126 (2007).","journal-title":"J. High Energy Phys."},{"key":"1757_CR29","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1088\/1126-6708\/2009\/09\/111","volume":"09","author":"S Alioli","year":"2009","unstructured":"Alioli, S., Nason, P., Oleari, C. & Re, E. NLO single-top production matched with shower in POWHEG: s- and t-channel contributions. J. High Energy Phys. 09, 111 (2009).","journal-title":"J. High Energy Phys."},{"key":"1757_CR30","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1007\/JHEP09(2012)130","volume":"09","author":"R Frederix","year":"2012","unstructured":"Frederix, R., Re, E. & Torrielli, P. Single-top t-channel hadroproduction in the four-flavour scheme with POWHEG and aMC@NLO. J. High Energy Phys. 09, 130 (2012).","journal-title":"J. High Energy Phys."},{"key":"1757_CR31","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-011-1547-z","volume":"71","author":"E Re","year":"2011","unstructured":"Re, E. Single-top Wt-channel production matched with parton showers using the POWHEG method. Eur. Phys. J. C 71, 1547 (2011).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR32","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1016\/j.cpc.2008.01.036","volume":"178","author":"T Sj\u00f6strand","year":"2008","unstructured":"Sj\u00f6strand, T., Mrenna, S. & Skands, P. Z. A brief introduction to PYTHIA 8.1. Comput. Phys. Commun. 178, 852\u2013867 (2008).","journal-title":"Comput. Phys. Commun."},{"key":"1757_CR33","doi-asserted-by":"publisher","first-page":"244","DOI":"10.1016\/j.nuclphysb.2012.10.003","volume":"867","author":"RD Ball","year":"2013","unstructured":"Ball, R. D. et al. Parton distributions with LHC data. Nucl. Phys. B 867, 244\u2013289 (2013).","journal-title":"Nucl. Phys. B"},{"key":"1757_CR34","unstructured":"ATLAS Collaboration. ATLAS Pythia 8 tunes to 7\u2009TeV data, ATL-PHYS-PUB-2014-021 CERN https:\/\/cds.cern.ch\/record\/1966419 (2014)."},{"key":"1757_CR35","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-010-1429-9","volume":"70","author":"ATLAS Collaboration.","year":"2010","unstructured":"ATLAS Collaboration. The ATLAS simulation infrastructure. Eur. Phys. J. C 70, 823 (2010).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR36","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1016\/S0168-9002(03)01368-8","volume":"506","author":"S Agostinelli","year":"2003","unstructured":"Agostinelli, S. et al. Geant4\u2014a simulation toolkit. Nucl. Instrum. Methods A 506, 250\u2013303 (2003).","journal-title":"Nucl. Instrum. Methods A"},{"key":"1757_CR37","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s41781-021-00062-2","volume":"6","author":"G Aad","year":"2022","unstructured":"Aad, G. et al. Emulating the impact of additional proton-proton interactions in the ATLAS simulation by presampling sets of inelastic Monte Carlo events. Comput. Softw. Big Sci. 6, 3 (2022).","journal-title":"Comput. Softw. Big Sci."},{"key":"1757_CR38","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-016-4120-y","volume":"76","author":"ATLAS Collaboration.","year":"2016","unstructured":"ATLAS Collaboration. Muon reconstruction performance of the ATLAS detector in proton\u2013proton collision data at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$. Eur. Phys. J. C 76, 292 (2016).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR39","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-019-7140-6","volume":"79","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Electron reconstruction and identification in the ATLAS experiment using the 2015 and 2016 LHC proton\u2013proton collision data at$$\\sqrt{s}$$ = 13\u2009TeV. Eur. Phys. J. C 79, 639 (2019).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR40","doi-asserted-by":"publisher","first-page":"063","DOI":"10.1088\/1126-6708\/2008\/04\/063","volume":"04","author":"M Cacciari","year":"2008","unstructured":"Cacciari, M., Salam, G. P. & Soyez, G. The anti-kt jet clustering algorithm. J. High Energy Phys. 04, 063 (2008).","journal-title":"J. High Energy Phys."},{"key":"1757_CR41","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-012-1896-2","volume":"72","author":"M Cacciari","year":"2012","unstructured":"Cacciari, M., Salam, G. P. & Soyez, G. FastJet user manual. Eur. Phys. J. C 72, 1896 (2012).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR42","doi-asserted-by":"publisher","first-page":"072002","DOI":"10.1103\/PhysRevD.96.072002","volume":"96","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Jet energy scale measurements and their systematic uncertainties in proton\u2013proton collisions at $$\\sqrt{s}=13$$ TeV with the ATLAS detector. Phys. Rev. D 96, 072002 (2017).","journal-title":"Phys. Rev. D"},{"key":"1757_CR43","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-016-4395-z","volume":"76","author":"ATLAS Collaboration.","year":"2016","unstructured":"ATLAS Collaboration. Performance of pile-up mitigation techniques for jets in pp collisions at $$\\sqrt{s}=8\\,{{{\\rm{TeV}}}}$$ using the ATLAS detector. Eur. Phys. J. C 76, 581 (2016).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR44","doi-asserted-by":"publisher","first-page":"970","DOI":"10.1140\/epjc\/s10052-019-7450-8","volume":"79","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. ATLAS b-jet identification performance and efficiency measurement with $$t\\overline{t}$$ events in pp collisions at $$\\sqrt{s}=13$$ TeV. Eur. Phys. J. C 79, 970 (2019).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR45","doi-asserted-by":"publisher","first-page":"032003","DOI":"10.1103\/PhysRevD.94.032003","volume":"94","author":"ATLAS Collaboration.","year":"2016","unstructured":"ATLAS Collaboration. Search for pair production of gluinos decaying via stop and sbottom in events with b-jets and large missing transverse momentum in pp collisions at $$\\sqrt{s}=13$$ TeV with the ATLAS detector. Phys. Rev. D 94, 032003 (2016).","journal-title":"Phys. Rev. D"},{"key":"1757_CR46","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-018-6288-9","volume":"78","author":"ATLAS Collaboration.","year":"2018","unstructured":"ATLAS Collaboration. Performance of missing transverse momentum reconstruction with the ATLAS detector using proton\u2013proton collisions at $$\\sqrt{s}$$ = 13 TeV. Eur. Phys. J. C 78, 903 (2018).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR47","unstructured":"ATLAS Collaboration. Object-based missing transverse momentum significance in the ATLAS detector. ATLAS-CONF-2018-038. CERN https:\/\/cds.cern.ch\/record\/2630948 (2018)."},{"key":"1757_CR48","doi-asserted-by":"publisher","first-page":"135341","DOI":"10.1016\/j.physletb.2020.135341","volume":"803","author":"ATLAS Collaboration.","year":"2020","unstructured":"ATLAS Collaboration. Evidence for electroweak production of two jets in association with a em>Z\u03b3 pair in pp collisions at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$ with the ATLAS detector. Phys. Lett. B 803, 135341 (2020).","journal-title":"Phys. Lett. B"},{"key":"1757_CR49","first-page":"031","volume":"04","author":"ATLAS Collaboration.","year":"2014","unstructured":"ATLAS Collaboration. Measurement of the electroweak production of dijets in association with a Z-boson and distributions sensitive to vector boson fusion in proton\u2013proton collisions at $$\\sqrt{s}=8\\,{{{\\rm{TeV}}}}$$ using the ATLAS detector. J. High Energy Phys. 04, 031 (2014).","journal-title":"J. High Energy Phys."},{"key":"1757_CR50","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-017-5007-2","volume":"77","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Measurements of electroweak Wjj production and constraints on anomalous gauge couplings with the ATLAS detector. Eur. Phys. J. C 77, 474 (2017).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR51","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1016\/j.physletb.2017.10.040","volume":"775","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Measurement of the cross-section for electroweak production of dijets in association with a Z boson in pp collisions at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$ with the ATLAS detector. Phys. Lett. B 775, 206\u2013228 (2017).","journal-title":"Phys. Lett. B"},{"key":"1757_CR52","doi-asserted-by":"publisher","first-page":"032005","DOI":"10.1103\/PhysRevD.97.032005","volume":"97","author":"ATLAS Collaboration.","year":"2018","unstructured":"ATLAS Collaboration. $$ZZ\\to {\\ell }^{+}{\\ell }^{-}{\\ell }^{{\\,}^{\\prime} +}{\\ell }^{{\\,}^{\\prime} -}$$ cross-section measurements and search for anomalous triple gauge couplings in 13\u2009TeV pp collisions with the ATLAS detector. Phys. Rev. D 97, 032005 (2018).","journal-title":"Phys. Rev. D"},{"key":"1757_CR53","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1016\/j.physletb.2017.11.049","volume":"776","author":"ATLAS Collaboration.","year":"2018","unstructured":"ATLAS Collaboration. Search for an invisibly decaying Higgs boson or dark matter candidates produced in association with a Z boson in pp collisions at $$\\sqrt{s}=$$ 13\u2009TeV with the ATLAS detector. Phys. Lett. B 776, 318\u2013337 (2018).","journal-title":"Phys. Lett. B"},{"key":"1757_CR54","unstructured":"ATLAS Collaboration. Luminosity determination in pp collisions at $$\\sqrt{s}=13$$ TeV using the ATLAS detector at the LHC, ATLAS-CONF-2019-021. CERN http:\/\/cds.cern.ch\/record\/2677054 (2019)."},{"key":"1757_CR55","doi-asserted-by":"publisher","first-page":"P07017","DOI":"10.1088\/1748-0221\/13\/07\/P07017","volume":"13","author":"G Avoni","year":"2018","unstructured":"Avoni, G. et al. The new lucid-2 detector for luminosity measurement and monitoring in atlas. J. Instrum. 13, P07017 (2018).","journal-title":"J. Instrum."},{"key":"1757_CR56","doi-asserted-by":"publisher","first-page":"023001","DOI":"10.1088\/0954-3899\/43\/2\/023001","volume":"43","author":"J Butterworth","year":"2016","unstructured":"Butterworth, J. et al. PDF4LHC recommendations for LHC Run II. J. Phys. G 43, 023001 (2016).","journal-title":"J. Phys. G"},{"key":"1757_CR57","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/JHEP10(2019)127","volume":"10","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Measurement of ZZ production in the \u2113\u2113\u03bd\u03bd final state with the ATLAS detector in pp collisions at $$\\sqrt{s}=13$$ TeV. J. High Energy Phys. 10, 127 (2019).","journal-title":"J. High Energy Phys."},{"key":"1757_CR58","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-016-4018-8","volume":"76","author":"J Bellm","year":"2016","unstructured":"Bellm, J. et al. Herwig 7.0\/Herwig++ 3.0 release note. Eur. Phys. J. C 76, 196 (2016).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR59","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1140\/epjc\/s10052-008-0798-9","volume":"58","author":"M B\u00e4hr","year":"2008","unstructured":"B\u00e4hr, M. et al. Herwig++ physics and manual. Eur. Phys. J. C 58, 639\u2013707 (2008).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR60","doi-asserted-by":"publisher","first-page":"1189","DOI":"10.1214\/aos\/1013203451","volume":"29","author":"JH Friedman","year":"2001","unstructured":"Friedman, J. H. Greedy function approximation: A gradient boosting machine. Ann. Stat. 29, 1189\u20131232 (2001).","journal-title":"Ann. Stat."},{"key":"1757_CR61","unstructured":"H\u00f6cker, A. et al. TMVA \u2013 toolkit for multivariate data analysis. Preprint at https:\/\/arxiv.org\/abs\/physics\/0703039 (2007)."},{"key":"1757_CR62","doi-asserted-by":"publisher","first-page":"1554","DOI":"10.1140\/epjc\/s10052-011-1554-0","volume":"71","author":"G Cowan","year":"2011","unstructured":"Cowan, G., Cranmer, K., Gross, E. & Vitells, O. Asymptotic formulae for likelihood-based tests of new physics. Eur. Phys. J. C 71, 1554 (2011).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR63","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1016\/j.physletb.2017.10.020","volume":"774","author":"CMS Collaboration.","year":"2017","unstructured":"CMS Collaboration. Measurement of vector boson scattering and constraints on anomalous quartic couplings from events with four leptons and two jets in proton\u2013proton collisions at $$\\sqrt{s}=$$ 13 TeV. Phys. Lett. B 774, 682\u2013705 (2017).","journal-title":"Phys. Lett. B"},{"key":"1757_CR64","unstructured":"ATLAS Collaboration. ATLAS computing acknowledgements, ATL-SOFT-PUB-2021-003. CERN (2021) https:\/\/cds.cern.ch\/record\/2776662."},{"key":"1757_CR65","doi-asserted-by":"publisher","unstructured":"ATLAS Collaboration. Observation of electroweak production of two jets and a Z-boson pair with the ATLAS detector at the LHC (version 3). HEPData https:\/\/doi.org\/10.17182\/hepdata.93015.v3 (2022).","DOI":"10.17182\/hepdata.93015.v3"}],"container-title":["Nature Physics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,10]],"date-time":"2023-02-10T15:32:36Z","timestamp":1676043156000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,9]]},"references-count":65,"alternative-id":["1757"],"URL":"http:\/\/dx.doi.org\/10.1038\/s41567-022-01757-y","relation":{},"ISSN":["1745-2473","1745-2481"],"issn-type":[{"value":"1745-2473","type":"print"},{"value":"1745-2481","type":"electronic"}],"subject":["General Physics and Astronomy"],"published":{"date-parts":[[2023,2,9]]},"assertion":[{"value":"27 April 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 August 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 February 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index fbf6f72c0..c27cebf65 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -22,6 +22,13 @@ class CrossrefMappingTest { val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val mapper = new ObjectMapper() + @Test + def testMissingAuthorParser():Unit = { + val json: String = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json")).mkString + val result = Crossref2Oaf.convert(json) + result.filter(o => o.isInstanceOf[Publication]).map(p=> p.asInstanceOf[Publication]).foreach(p =>assertTrue(p.getAuthor.size()>0)) + } + @Test def testFunderRelationshipsMapping(): Unit = { val template = Source From 4c8706efee6496356199e9c0bca5b99e6263c139 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 29 Jan 2024 18:21:36 +0100 Subject: [PATCH 49/57] [orcid-enrichment] change the value of parameters. --- .../oozie_app/workflow.xml | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index bab1e55df..a9642d637 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -114,7 +114,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -142,7 +142,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -170,7 +170,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -198,7 +198,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -225,8 +225,8 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --sourcePath${workingDir}/orcid/orcidprop - --outputPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc + --sourcePath${workingDir}/orcid/targetOrcidAssoc + --outputPath${workingDir}/orcid/mergedOrcidAssoc @@ -247,9 +247,10 @@ eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=4 + --executor-memory=4G --driver-memory=${sparkDriverMemory} + --conf spark.executor.memoryOverhead=5G --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} @@ -259,9 +260,9 @@ --conf spark.speculation=false --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=15000 - --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/publication @@ -291,7 +292,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/dataset @@ -321,7 +322,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/otherresearchproduct @@ -351,7 +352,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/software From 1a8b609ed27c8ef0e84f0ccb32c243e532591afe Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 30 Jan 2024 11:34:16 +0100 Subject: [PATCH 50/57] code formatting --- .../dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java index c71ccb439..6731f2332 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java @@ -30,7 +30,7 @@ public class MoveResult implements Serializable { public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - MoveResult.class + MoveResult.class .getResourceAsStream( "/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json")); From f28c63d5ef84ce090aa07e0bf2afe841975cbd39 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Feb 2024 09:44:56 +0200 Subject: [PATCH 51/57] [orcid enrichment] fixed directory cleanup before distcp --- .../eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml index bbd3581c5..3493ecb2f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml @@ -49,10 +49,10 @@ - - - - + + + + From e8630a6d03231e00d9d6676629f55400887d76ee Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Feb 2024 14:59:06 +0200 Subject: [PATCH 52/57] [graph cleaning] rule out datasources without an officialname --- .../dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 0124e96fc..f01f90fe4 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -312,7 +312,8 @@ public class GraphCleaningFunctions extends CleaningFunctions { } if (value instanceof Datasource) { - // nothing to evaluate here + final Datasource d = (Datasource) value; + return Objects.nonNull(d.getOfficialname()) && StringUtils.isNotBlank(d.getOfficialname().getValue()); } else if (value instanceof Project) { final Project p = (Project) value; return Objects.nonNull(p.getCode()) && StringUtils.isNotBlank(p.getCode().getValue()); From 08162902ab91f4c2adb3a160b34c5b9de1c2a97e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Feb 2024 16:43:40 +0200 Subject: [PATCH 53/57] [actiosets] introduced support for the PromoteAction strategy --- .../actionmanager/promote/PromoteAction.java | 39 +++++++++++++++++++ .../PromoteActionPayloadForGraphTableJob.java | 29 ++++++++++---- .../PromoteActionPayloadFunctions.java | 4 +- ...load_for_graph_table_input_parameters.json | 6 +++ .../wf/dataset/oozie_app/workflow.xml | 2 + .../wf/datasource/oozie_app/workflow.xml | 1 + .../wf/organization/oozie_app/workflow.xml | 1 + .../oozie_app/workflow.xml | 2 + .../wf/project/oozie_app/workflow.xml | 1 + .../wf/publication/oozie_app/workflow.xml | 2 + .../wf/relation/oozie_app/workflow.xml | 1 + .../wf/software/oozie_app/workflow.xml | 2 + .../PromoteActionPayloadFunctionsTest.java | 4 +- 13 files changed, 84 insertions(+), 10 deletions(-) create mode 100644 dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java new file mode 100644 index 000000000..163a8708e --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024. + * SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package eu.dnetlib.dhp.actionmanager.promote; + +/** Encodes the Actionset promotion strategies */ +public class PromoteAction { + + /** The supported actionset promotion strategies + * + * ENRICH: promotes only records in the actionset matching another record in the + * graph and enriches them applying the given MergeAndGet strategy + * UPSERT: promotes all the records in an actionset, matching records are updated + * using the given MergeAndGet strategy, the non-matching record as inserted as they are. + */ + public enum Strategy { + ENRICH, UPSERT + } + + /** + * Returns the string representation of the join type implementing the given PromoteAction. + * + * @param strategy the strategy to be used to promote the Actionset contents + * @return the join type used to implement the promotion strategy + */ + public static String joinTypeForStrategy(PromoteAction.Strategy strategy) { + switch (strategy) { + case ENRICH: + return "join"; + case UPSERT: + return "full_outer"; + default: + throw new IllegalStateException("unsupported PromoteAction: " + strategy.toString()); + } + } +} diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java index 7b024bea8..56cbda4d6 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java @@ -67,8 +67,9 @@ public class PromoteActionPayloadForGraphTableJob { String outputGraphTablePath = parser.get("outputGraphTablePath"); logger.info("outputGraphTablePath: {}", outputGraphTablePath); - MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase()); - logger.info("strategy: {}", strategy); + MergeAndGet.Strategy mergeAndGetStrategy = MergeAndGet.Strategy + .valueOf(parser.get("mergeAndGetStrategy").toUpperCase()); + logger.info("mergeAndGetStrategy: {}", mergeAndGetStrategy); Boolean shouldGroupById = Optional .ofNullable(parser.get("shouldGroupById")) @@ -76,6 +77,12 @@ public class PromoteActionPayloadForGraphTableJob { .orElse(true); logger.info("shouldGroupById: {}", shouldGroupById); + PromoteAction.Strategy promoteActionStrategy = Optional + .ofNullable(parser.get("promoteActionStrategy")) + .map(PromoteAction.Strategy::valueOf) + .orElse(PromoteAction.Strategy.UPSERT); + logger.info("promoteActionStrategy: {}", promoteActionStrategy); + @SuppressWarnings("unchecked") Class rowClazz = (Class) Class.forName(graphTableClassName); @SuppressWarnings("unchecked") @@ -97,7 +104,8 @@ public class PromoteActionPayloadForGraphTableJob { inputGraphTablePath, inputActionPayloadPath, outputGraphTablePath, - strategy, + mergeAndGetStrategy, + promoteActionStrategy, rowClazz, actionPayloadClazz, shouldGroupById); @@ -124,14 +132,16 @@ public class PromoteActionPayloadForGraphTableJob { String inputGraphTablePath, String inputActionPayloadPath, String outputGraphTablePath, - MergeAndGet.Strategy strategy, + MergeAndGet.Strategy mergeAndGetStrategy, + PromoteAction.Strategy promoteActionStrategy, Class rowClazz, Class actionPayloadClazz, Boolean shouldGroupById) { Dataset rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz); Dataset actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz); Dataset result = promoteActionPayloadForGraphTable( - rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz, shouldGroupById) + rowDS, actionPayloadDS, mergeAndGetStrategy, promoteActionStrategy, rowClazz, actionPayloadClazz, + shouldGroupById) .map((MapFunction) value -> value, Encoders.bean(rowClazz)); saveGraphTable(result, outputGraphTablePath); @@ -183,7 +193,8 @@ public class PromoteActionPayloadForGraphTableJob { private static Dataset promoteActionPayloadForGraphTable( Dataset rowDS, Dataset actionPayloadDS, - MergeAndGet.Strategy strategy, + MergeAndGet.Strategy mergeAndGetStrategy, + PromoteAction.Strategy promoteActionStrategy, Class rowClazz, Class actionPayloadClazz, Boolean shouldGroupById) { @@ -195,8 +206,9 @@ public class PromoteActionPayloadForGraphTableJob { SerializableSupplier> rowIdFn = ModelSupport::idFn; SerializableSupplier> actionPayloadIdFn = ModelSupport::idFn; - SerializableSupplier> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy); - SerializableSupplier> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy); + SerializableSupplier> mergeRowWithActionPayloadAndGetFn = MergeAndGet + .functionFor(mergeAndGetStrategy); + SerializableSupplier> mergeRowsAndGetFn = MergeAndGet.functionFor(mergeAndGetStrategy); SerializableSupplier zeroFn = zeroFn(rowClazz); SerializableSupplier> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSourceAndTarget; @@ -207,6 +219,7 @@ public class PromoteActionPayloadForGraphTableJob { rowIdFn, actionPayloadIdFn, mergeRowWithActionPayloadAndGetFn, + promoteActionStrategy, rowClazz, actionPayloadClazz); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java index d799c646b..f0b094240 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java @@ -34,6 +34,7 @@ public class PromoteActionPayloadFunctions { * @param rowIdFn Function used to get the id of graph table row * @param actionPayloadIdFn Function used to get id of action payload instance * @param mergeAndGetFn Function used to merge graph table row and action payload instance + * @param promoteActionStrategy the Actionset promotion strategy * @param rowClazz Class of graph table * @param actionPayloadClazz Class of action payload * @param Type of graph table row @@ -46,6 +47,7 @@ public class PromoteActionPayloadFunctions { SerializableSupplier> rowIdFn, SerializableSupplier> actionPayloadIdFn, SerializableSupplier> mergeAndGetFn, + PromoteAction.Strategy promoteActionStrategy, Class rowClazz, Class actionPayloadClazz) { if (!isSubClass(rowClazz, actionPayloadClazz)) { @@ -61,7 +63,7 @@ public class PromoteActionPayloadFunctions { .joinWith( actionPayloadWithIdDS, rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")), - "full_outer") + PromoteAction.joinTypeForStrategy(promoteActionStrategy)) .map( (MapFunction, Tuple2>, G>) value -> { Optional rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2); diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json index 00c9404ef..81a7c77d7 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json @@ -41,6 +41,12 @@ "paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET", "paramRequired": true }, + { + "paramName": "pas", + "paramLongName": "promoteActionStrategy", + "paramDescription": "strategy for promoting the actionset contents into the graph tables, ENRICH or UPSERT (default)", + "paramRequired": false + }, { "paramName": "sgid", "paramLongName": "shouldGroupById", diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml index 4f374a75a..5401b45ca 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml @@ -115,6 +115,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Dataset --outputGraphTablePath${workingDir}/dataset --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -167,6 +168,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/dataset --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml index c85ba4ac1..f9bd66ae3 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml @@ -106,6 +106,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Datasource --outputGraphTablePath${outputGraphRootPath}/datasource --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml index 412cad70b..ebfdeee31 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml @@ -106,6 +106,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Organization --outputGraphTablePath${outputGraphRootPath}/organization --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml index 7bac760e2..02399ed9b 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml @@ -114,6 +114,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputGraphTablePath${workingDir}/otherresearchproduct --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -166,6 +167,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/otherresearchproduct --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml index daf48e9d7..57c2357b4 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml @@ -106,6 +106,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Project --outputGraphTablePath${outputGraphRootPath}/project --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml index b76dc82f1..92b114776 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml @@ -115,6 +115,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Publication --outputGraphTablePath${workingDir}/publication --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -167,6 +168,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/publication --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml index d3086dbdc..e9e5f0b45 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml @@ -107,6 +107,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Relation --outputGraphTablePath${outputGraphRootPath}/relation --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml index b5673b18f..1d36ddf94 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml @@ -114,6 +114,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Software --outputGraphTablePath${workingDir}/software --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -166,6 +167,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/software --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java index cbc1bfaba..777e2fa1c 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java @@ -54,7 +54,7 @@ public class PromoteActionPayloadFunctionsTest { RuntimeException.class, () -> PromoteActionPayloadFunctions .joinGraphTableWithActionPayloadAndMerge( - null, null, null, null, null, OafImplSubSub.class, OafImpl.class)); + null, null, null, null, null, null, OafImplSubSub.class, OafImpl.class)); } @Test @@ -104,6 +104,7 @@ public class PromoteActionPayloadFunctionsTest { rowIdFn, actionPayloadIdFn, mergeAndGetFn, + PromoteAction.Strategy.UPSERT, OafImplSubSub.class, OafImplSubSub.class) .collectAsList(); @@ -183,6 +184,7 @@ public class PromoteActionPayloadFunctionsTest { rowIdFn, actionPayloadIdFn, mergeAndGetFn, + PromoteAction.Strategy.UPSERT, OafImplSubSub.class, OafImplSub.class) .collectAsList(); From d86b909db29a40517910d02a9e4129357b553a2f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Feb 2024 16:55:36 +0200 Subject: [PATCH 54/57] [actiosets] fixed join type --- .../eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java index 163a8708e..8fb9c8c95 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java @@ -29,7 +29,7 @@ public class PromoteAction { public static String joinTypeForStrategy(PromoteAction.Strategy strategy) { switch (strategy) { case ENRICH: - return "join"; + return "left_outer"; case UPSERT: return "full_outer"; default: From 8dd666aedd179b2c4f503b908f4a8fe5871c1e1e Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 8 Feb 2024 15:12:16 +0100 Subject: [PATCH 55/57] Dedup aliases, created when a dedup in a previous build has been merged in a new dedup, need to be marked as "deletedbyinference", since they are "merged" in the new dedup --- .../dhp/oa/dedup/DedupRecordFactory.java | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index eddfba309..d5b106c81 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -122,22 +122,41 @@ public class DedupRecordFactory { } return Stream - .concat(Stream.of(agg.getDedupId()), agg.aliases.stream()) - .map(id -> { - try { - OafEntity res = (OafEntity) BeanUtils.cloneBean(agg.entity); - res.setId(id); - res.setDataInfo(dataInfo); - res.setLastupdatetimestamp(ts); - return res; - } catch (Exception e) { - throw new RuntimeException(e); - } - }) + .concat( + Stream + .of(agg.getDedupId()) + .map(id -> createDedupOafEntity(id, agg.entity, dataInfo, ts)), + agg.aliases + .stream() + .map(id -> createMergedDedupAliasOafEntity(id, agg.entity, dataInfo, ts))) .iterator(); }, beanEncoder); } + private static OafEntity createDedupOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) { + try { + OafEntity res = (OafEntity) BeanUtils.cloneBean(base); + res.setId(id); + res.setDataInfo(dataInfo); + res.setLastupdatetimestamp(ts); + return res; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static OafEntity createMergedDedupAliasOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) { + try { + OafEntity res = createDedupOafEntity(id, base, dataInfo, ts); + DataInfo ds = (DataInfo) BeanUtils.cloneBean(dataInfo); + ds.setDeletedbyinference(true); + res.setDataInfo(ds); + return res; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) { if (duplicate == null) { From 1416f16b35e299169dcf1cea00ef7bc111e9acbe Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 9 Feb 2024 10:19:53 +0100 Subject: [PATCH 56/57] [graph raw] fixed mapping of the original resource type from the Datacite format --- .../dhp/oa/graph/raw/OdfToOafMapper.java | 22 +++++-- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 32 +++++++++- .../dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml | 59 +++++++++++++++++++ 3 files changed, 106 insertions(+), 7 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 08529125c..57e0d2955 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -238,11 +238,23 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { (Element) doc .selectSingleNode( "//*[local-name()='metadata']/*[local-name() = 'resource']/*[local-name() = 'resourceType']")) - .map(element -> { - final String resourceTypeURI = element.attributeValue("uri"); - final String resourceTypeAnyURI = element.attributeValue("anyURI"); - final String resourceTypeTxt = element.getText(); - final String resourceTypeGeneral = element.attributeValue("resourceTypeGeneral"); + .map(e -> { + final String resourceTypeURI = Optional + .ofNullable(e.attributeValue("uri")) + .filter(StringUtils::isNotBlank) + .orElse(null); + final String resourceTypeAnyURI = Optional + .ofNullable(e.attributeValue("anyURI")) + .filter(StringUtils::isNotBlank) + .orElse(null); + final String resourceTypeTxt = Optional + .ofNullable(e.getText()) + .filter(StringUtils::isNotBlank) + .orElse(null); + final String resourceTypeGeneral = Optional + .ofNullable(e.attributeValue("resourceTypeGeneral")) + .filter(StringUtils::isNotBlank) + .orElse(null); return ObjectUtils .firstNonNull(resourceTypeURI, resourceTypeAnyURI, resourceTypeTxt, resourceTypeGeneral); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index ac0435ce2..1f2952b75 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1171,6 +1171,34 @@ class MappersTest { } + @Test + void test_Zenodo2() throws IOException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_zenodo2.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + assertEquals(3, list.size()); + Publication p = cleanup((Publication) list.get(0), vocs); + + assertNotNull(p.getInstance()); + assertEquals(1, p.getInstance().size()); + + final Instance instance = p.getInstance().get(0); + + assertNotNull(instance.getInstanceTypeMapping()); + assertEquals(1, instance.getInstanceTypeMapping().size()); + + Optional coarType = instance + .getInstanceTypeMapping() + .stream() + .filter(itm -> ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName())) + .findFirst(); + + assertTrue(coarType.isPresent()); + assertNotNull(coarType.get().getOriginalType()); + assertNull(coarType.get().getTypeCode()); + assertNull(coarType.get().getTypeLabel()); + } + @Test void testROHub2() throws IOException { final String xml = IOUtils @@ -1229,7 +1257,7 @@ class MappersTest { } @Test - public void testD4ScienceTraining() throws IOException { + void testD4ScienceTraining() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-1-training.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -1240,7 +1268,7 @@ class MappersTest { } @Test - public void testD4ScienceDataset() throws IOException { + void testD4ScienceDataset() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-2-dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml new file mode 100644 index 000000000..ebe105de8 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml @@ -0,0 +1,59 @@ + + +
+ oai:zenodo.org:1596086 + 2020-01-20T13:50:28Z + openaire + 2024-02-08T11:03:10.994Z + od______2659::036d5555a6688ed00c8d0da97bdece3b + 2024-02-08T11:03:10.994Z + 2024-02-08T11:03:10.994Z +
+ + + https://zenodo.org/record/1596086 + + + + Bonney, T. G. + T. G. + Bonney + + + + Ice Blocks on a Moraine + + Zenodo + 1889 + + 1889-08-22 + + + + 10.1038/040391a0 + + + Creative Commons Zero v1.0 Universal + Open Access + + + n/a + + + 0001 + 1889-08-22 + OPEN + http://creativecommons.org/publicdomain/zero/1.0/legalcode + + + + +
From b3ddbaed58c8b54cfcc5f56f30e909ed9a8b4d7e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 7 Feb 2024 17:02:05 +0100 Subject: [PATCH 57/57] fixed import of ORPs stored on HDFS in the internal graph format (e.g. Datacite) --- .../oa/graph/raw/CopyHdfsOafSparkApplication.scala | 4 ++-- .../graph/raw/CopyHdfsOafSparkApplicationTest.java | 12 ++++++++++++ .../eu/dnetlib/dhp/oa/graph/raw/datacite_orp.json | 1 + 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datacite_orp.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala index 533948289..9d7cca7dd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplication.scala @@ -93,8 +93,8 @@ object CopyHdfsOafSparkApplication { hasSource != null && hasTarget != null } else { val hasId = (json \ "id").extractOrElse[String](null) - val resultType = (json \ "resulttype" \ "classid").extractOrElse[String](null) - hasId != null && oafType.equalsIgnoreCase(resultType) + val resultType = (json \ "resulttype" \ "classid").extractOrElse[String]("") + hasId != null && oafType.startsWith(resultType) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java index 85cb551bc..1f5559377 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafSparkApplicationTest.java @@ -59,7 +59,19 @@ public class CopyHdfsOafSparkApplicationTest { .getResourceAsStream( "/eu/dnetlib/dhp/oa/graph/raw/publication_2_unknownProperty.json")), "publication")); + } + @Test + void isOafType_Datacite_ORP() throws IOException { + assertTrue( + CopyHdfsOafSparkApplication + .isOafType( + IOUtils + .toString( + getClass() + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/raw/datacite_orp.json")), + "otherresearchproduct")); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datacite_orp.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datacite_orp.json new file mode 100644 index 000000000..abf44b49c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datacite_orp.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"id":"50|doi_________::0a2763977bb76876aff2d3b33a874552","originalId":["50|datacite____::0a2763977bb76876aff2d3b33a874552","10.25935/nhb2-wy29"],"pid":[{"value":"10.25935/nhb2-wy29","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2024-01-15T18:00:35+0000","dateoftransformation":"2024-01-15T18:00:35+0000","extraInfo":null,"oaiprovenance":null,"measures":null,"processingchargeamount":null,"processingchargecurrency":null,"author":[{"fullname":"Louis, Corentin","name":"Corentin","surname":"Louis","rank":1,"pid":[{"value":"https://orcid.org/0000-0002-9552-8822","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[{"value":"Dublin Institute For Advanced Studies","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"fullname":"Zarka, Philippe","name":"Philippe","surname":"Zarka","rank":2,"pid":[{"value":"https://orcid.org/0000-0003-1672-9878","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[{"value":"Laboratory of Space Studies and Instrumentation in Astrophysics","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"fullname":"Cecconi, Baptiste","name":"Baptiste","surname":"Cecconi","rank":3,"pid":[{"value":"https://orcid.org/0000-0001-7915-5571","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[{"value":"Laboratory of Space Studies and Instrumentation in Astrophysics","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"fullname":"Kurth, William","name":"William","surname":"Kurth","rank":4,"pid":[{"value":"https://orcid.org/0000-0002-5471-6202","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[{"value":"University of Iowa","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]}],"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"metaResourceType":null,"language":null,"country":null,"subject":[],"title":[{"value":"Catalogue of Jupiter radio emissions identified in the Juno/Waves observations","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2021-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2021-01-01","dataInfo":null},"publisher":{"value":"PADC/MASER","dataInfo":null},"embargoenddate":null,"source":null,"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":[{"license":{"value":"https://creativecommons.org/licenses/by/4.0/legalcode","dataInfo":null},"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0022","classname":"Collection","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"instanceTypeMapping":[{"originalType":"Collection","typeCode":null,"typeLabel":null,"vocabularyName":"openaire::coar_resource_types_3_1"}],"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.25935/nhb2-wy29"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.25935/nhb2-wy29","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2021-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"measures":null,"fulltext":null}],"eoscifguidelines":null,"openAccessColor":null,"publiclyFunded":null,"contactperson":null,"contactgroup":null,"tool":null,"isGreen":null,"isInDiamondJournal":null} \ No newline at end of file