From 616622d2bb90f4a5e43a9da2dc864ef03dab1f00 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 7 Dec 2023 09:59:52 +0100 Subject: [PATCH 01/56] first version of the workflow single step --- .../main/java/eu/dnetlib/dhp/api/Utils.java | 5 + .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 8 +- .../PrepareDatasourceCountryAssociation.java | 2 +- .../SparkCountryPropagationJob.java | 6 + ...kResultToCommunityFromOrganizationJob.java | 6 + .../PrepareResultCommunitySet.java | 2 +- .../SparkResultToCommunityFromProject.java | 6 + .../PrepareResultCommunitySetStep1.java | 18 +- ...parkResultToCommunityThroughSemRelJob.java | 6 + ...t_preparecommunitytoresult_parameters.json | 4 +- .../oozie_app/workflow.xml | 6 +- .../dhp/wf/main/oozie_app/config-default.xml | 30 ++ .../dnetlib/dhp/wf/main/oozie_app/import.txt | 10 + .../dhp/wf/main/oozie_app/workflow.xml | 324 +++++++++++++++ .../bulktag/oozie_app/config-default.xml | 54 +++ .../bulktag/oozie_app/workflow.xml | 66 ++++ .../oozie_app/config-default.xml | 58 +++ .../countrypropagation/oozie_app/workflow.xml | 316 +++++++++++++++ .../input_preparation_parameter.json | 50 +++ .../input_propagation_parameter.json | 62 +++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 93 +++++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 369 ++++++++++++++++++ .../oozie_app/config-default.xml | 63 +++ .../projecttoresult/oozie_app/workflow.xml | 94 +++++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 88 +++++ .../input_communitytoresult_parameters.json | 28 ++ ...t_preparecommunitytoresult_parameters.json | 28 ++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 90 +++++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 305 +++++++++++++++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 182 +++++++++ .../oozie_app/config-default.xml | 58 +++ .../oozie_app/workflow.xml | 97 +++++ 38 files changed, 2863 insertions(+), 19 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index d121b8b7e2..bb30f55d6f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -167,4 +167,9 @@ public class Utils implements Serializable { }); return projectMap; } + + public static List getCommunityIdList(String baseURL) throws IOException { + return getValidCommunities(baseURL).stream() + .map(community -> community.getId()).collect(Collectors.toList()); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 5d1b2b38d1..5745515baa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -151,7 +151,13 @@ public class SparkBulkTagJob { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + e.name()); + .json(outputPath + e.name());//writing the tagging in the working dir for entity + + readPath(spark, outputPath + e.name(), resultClazz) //copy the tagging in the actual result output path + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(inputPath + e.name()); }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index b9f3bff523..b1720d19d1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -66,7 +66,7 @@ public class PrepareDatasourceCountryAssociation { conf, isSparkSessionManaged, spark -> { - removeOutputDir(spark, outputPath); + //removeOutputDir(spark, outputPath); prepareDatasourceCountryAssociation( spark, Arrays.asList(parser.get("whitelist").split(";")), diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index d9f6433a07..2b0dd76281 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -97,6 +97,12 @@ public class SparkCountryPropagationJob { .mode(SaveMode.Overwrite) .json(outputPath); + readPath(spark, outputPath, resultClazz) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(sourcePath); + } private static MapFunction, R> getCountryMergeFn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index df8ca3805b..9152b1f5a9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -92,6 +92,12 @@ public class SparkResultToCommunityFromOrganizationJob { .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath + e.name()); + + readPath(spark, outputPath + e.name(), resultClazz) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(inputPath + e.name()); } }); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java index 7fed2606b4..467e11a969 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java @@ -53,7 +53,7 @@ public class PrepareResultCommunitySet { log.info("outputPath: {}", outputPath); final String baseURL = parser.get("baseURL"); - log.info("baseUEL: {}", baseURL); + log.info("baseURL: {}", baseURL); final CommunityEntityMap projectsMap = Utils.getCommunityProjects(baseURL); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index 6e298cf946..5478915848 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -102,6 +102,12 @@ public class SparkResultToCommunityFromProject implements Serializable { .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath + e.name()); + + readPath(spark, outputPath + e.name(), resultClazz) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(inputPath + e.name()); } }); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index 0c836a3ba9..73c4e2d7c0 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -4,9 +4,11 @@ package eu.dnetlib.dhp.resulttocommunityfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import java.io.IOException; import java.util.Arrays; import java.util.List; +import eu.dnetlib.dhp.api.Utils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.sql.*; @@ -26,11 +28,6 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class PrepareResultCommunitySetStep1 { private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySetStep1.class); - private static final String COMMUNITY_LIST_XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')" - + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']" - + " and $x//CONFIGURATION/context/param[./@name='status']/text() != 'hidden'" - + " return $x//CONFIGURATION/context/@id/string()"; - /** * associates to each result the set of community contexts they are associated to; associates to each target of a * relation with allowed semantics the set of community context it could possibly inherit from the source of the @@ -88,10 +85,10 @@ public class PrepareResultCommunitySetStep1 { final List allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";")); log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel)); - final String isLookupUrl = parser.get("isLookUpUrl"); - log.info("isLookupUrl: {}", isLookupUrl); + final String baseURL = parser.get("baseURL"); + log.info("baseURL: {}", baseURL); - final List communityIdList = getCommunityList(isLookupUrl); + final List communityIdList = getCommunityList(baseURL); log.info("communityIdList: {}", new Gson().toJson(communityIdList)); final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase(); @@ -159,9 +156,8 @@ public class PrepareResultCommunitySetStep1 { .json(outputResultPath); } - public static List getCommunityList(final String isLookupUrl) throws ISLookUpException { - ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); - return isLookUp.quickSearchProfile(COMMUNITY_LIST_XQUERY); + public static List getCommunityList(final String baseURL) throws IOException { + return Utils.getCommunityIdList(baseURL); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index f31a262307..bb7ff1fb7b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -100,6 +100,12 @@ public class SparkResultToCommunityThroughSemRelJob { .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); + + readPath(spark, outputPath, resultClazz) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(inputPath); } private static MapFunction, R> contextUpdaterFn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json index 8c99da673c..271db10bb7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json @@ -1,7 +1,7 @@ [ { - "paramName":"is", - "paramLongName":"isLookUpUrl", + "paramName":"bu", + "paramLongName":"baseURL", "paramDescription": "URL of the isLookUp Service", "paramRequired": true }, diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml index 81b51443c6..916eb8b7ce 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -9,8 +9,8 @@ the semantic relationships allowed for propagation - isLookUpUrl - the isLookup service endpoint + baseURL + the baseurl for the comminity APIs outputPath @@ -116,7 +116,7 @@ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${workingDir}/preparedInfo/targetCommunityAssoc --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} + --baseURL${baseURL} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml new file mode 100644 index 0000000000..d262cb6e05 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt new file mode 100644 index 0000000000..b202594148 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt @@ -0,0 +1,10 @@ +## This is a classpath-based import file (this header is required) +orcid_propagation classpath eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app +bulk_tagging classpath eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app +affiliation_inst_repo classpath eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app +entity_semantic_relation classpath eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app +community_organization classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app +result_project classpath eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app +community_project classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app +community_sem_rel classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app +country_propagation classpath eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml new file mode 100644 index 0000000000..1e6736bf44 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -0,0 +1,324 @@ + + + + + sourcePath + the source path + + + allowedsemrelsorcidprop + the semantic relationships allowed for propagation + + + allowedsemrelsresultproject + the allowed semantics + + + allowedsemrelscommunitysemrel + the semantic relationships allowed for propagation + + + datasourceWhitelistForCountryPropagation + the white list + + + allowedtypes + the allowed types + + + outputPath + the output path + + + organizationtoresultcommunitymap + organization community map + + + pathMap + the json path associated to each selection field + + + blacklist + list of datasources in blacklist for the affiliation from instrepo propagation + + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${wf:conf('resumeFrom') eq 'BulkTagging'} + ${wf:conf('resumeFrom') eq 'AffiliationInstitutionalRepository'} + ${wf:conf('resumeFrom') eq 'AffiliationSemanticRelation'} + ${wf:conf('resumeFrom') eq 'CommunityOrganization'} + ${wf:conf('resumeFrom') eq 'ResultProject'} + ${wf:conf('resumeFrom') eq 'CommunityProject'} + ${wf:conf('resumeFrom') eq 'CommunitySemanticRelation'} + ${wf:conf('resumeFrom') eq 'CountryPropagation'} + + + + + + + + ${wf:appPath()}/orcid_propagation + + + + + sourcePath + ${sourcePath} + + + allowedsemrels + ${allowedsemrelsorcidprop} + + + outputPath + ${outputPath} + + + + + + + + + + ${wf:appPath()}/bulk_tagging + + + + + sourcePath + ${outputPath} + + + baseURL + ${baseURL} + + + pathMap + ${pathMap} + + + + + + + + + + ${wf:appPath()}/affiliation_inst_repo + + + + + sourcePath + ${outputPath} + + + blacklist + ${blacklist} + + + + + + + + + + ${wf:appPath()}/affiliation_semantic_relation + + + + + sourcePath + ${outputPath} + + + + + + + + + + ${wf:appPath()}/community_organization + + + + + sourcePath + ${outputPath} + + + baseURL + ${baseURL} + + + + + + + + + + ${wf:appPath()}/result_project + + + + + sourcePath + ${outputPath} + + + allowedsemrels + ${allowedsemrelsresultproject} + + + + + + + + + + ${wf:appPath()}/community_project + + + + + sourcePath + ${outputPath} + + + + + + + + + + ${wf:appPath()}/community_sem_rel + + + + + sourcePath + ${outputPath} + + + allowedsemrels + ${allowedsemrelscommunitysemrel} + + + baseURL + ${baseURL} + + + + + + + + + + ${wf:appPath()}/country_propagation + + + + + sourcePath + ${outputPath} + + + whitelist + ${datasourceWhitelistForCountryPropagation} + + + allowedtypes + ${allowedtupes} + + + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml new file mode 100644 index 0000000000..fe82ae1940 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml @@ -0,0 +1,54 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml new file mode 100644 index 0000000000..a735e2b0ee --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml @@ -0,0 +1,66 @@ + + + + sourcePath + the source path + + + pathMap + the json path associated to each selection field + + + baseURL + The URL to access the community APIs + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn-cluster + cluster + bulkTagging-publication + eu.dnetlib.dhp.bulktag.SparkBulkTagJob + dhp-enrichment-${projectVersion}.jar + + --num-executors=${sparkExecutorNumber} + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath}/ + --workingPath${workingDir}/bulktag/ + --pathMap${pathMap} + --baseURL${baseURL} + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml new file mode 100644 index 0000000000..2744ea92ba --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml new file mode 100644 index 0000000000..1fbaeb5d55 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -0,0 +1,316 @@ + + + + sourcePath + the source path + + + whitelist + the white list + + + allowedtypes + the allowed types + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + yarn + cluster + PrepareDatasourceCountryAssociation + eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath} + --whitelist${whitelist} + --allowedtypes${allowedtypes} + --workingPath${workingDir}/country + + + + + + + + + + + + + + + yarn + cluster + prepareResultCountry-Publication + eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/publication + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + + + + + + + + yarn + cluster + prepareResultCountry-Dataset + eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/dataset + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + + + + + + + + yarn + cluster + prepareResultCountry-ORP + eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/otherresearchproduct + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + + + + + + + + yarn + cluster + prepareResultCountry-Software + eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/software + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + + + + + + + + + + + + + + + + + yarn + cluster + countryPropagationForPublications + eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/publication + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + + + + + + + + + yarn + cluster + countryPropagationForDataset + eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/dataset + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + + + + + + + + + yarn + cluster + countryPropagationForORP + eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/otherresearchproduct + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + + + + + + + + + yarn + cluster + countryPropagationForSoftware + eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --sourcePath${sourcePath}/software + --workingPath${workingDir}/country + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json new file mode 100644 index 0000000000..b599373318 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json @@ -0,0 +1,50 @@ +[ + { + "paramName":"gp", + "paramLongName":"graphPath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"lp", + "paramLongName":"leavesPath", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"cp", + "paramLongName":"childParentPath", + "paramDescription": "path where to store/find association from datasource and organization", + "paramRequired": true + }, + { + "paramName":"rp", + "paramLongName":"resultOrgPath", + "paramDescription": "path where to store/find already linked results and organizations", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "rep", + "paramLongName": "relationPath", + "paramDescription": "the path where to store the selected subset of relations", + "paramRequired": false + }, + { + "paramName": "pop", + "paramLongName": "projectOrganizationPath", + "paramDescription": "the number of iterations to be computed", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json new file mode 100644 index 0000000000..66a7f5b2f5 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json @@ -0,0 +1,62 @@ +[ + { + "paramName":"rep", + "paramLongName":"relationPath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"lp", + "paramLongName":"leavesPath", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"cp", + "paramLongName":"childParentPath", + "paramDescription": "path where to store/find association from datasource and organization", + "paramRequired": true + }, + { + "paramName":"rp", + "paramLongName":"resultOrgPath", + "paramDescription": "path where to store/find already linked results and organizations", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "wd", + "paramLongName": "workingDir", + "paramDescription": "true if it is a test running", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "it", + "paramLongName": "iterations", + "paramDescription": "the number of iterations to be computed", + "paramRequired": false + }, + { + "paramName": "pop", + "paramLongName": "projectOrganizationPath", + "paramDescription": "the number of iterations to be computed", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml new file mode 100644 index 0000000000..2744ea92ba --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml new file mode 100644 index 0000000000..e3f3c1758d --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -0,0 +1,93 @@ + + + + sourcePath + the source path + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn + cluster + PrepareResultOrganizationAssociation + eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --graphPath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --leavesPath${workingDir}/entitiesSemanticRelation/preparedInfo/leavesPath + --childParentPath${workingDir}/entitiesSemanticRelation/preparedInfo/childParentPath + --resultOrgPath${workingDir}/entitiesSemanticRelation/preparedInfo/resultOrgPath + --projectOrganizationPath${workingDir}/entitiesSemanticRelation/preparedInfo/projectOrganizationPath + --relationPath${workingDir}/entitiesSemanticRelation/preparedInfo/relation + + + + + + + + yarn + cluster + resultToOrganizationFromSemRel + eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=3840 + + --relationPath${workingDir}/entitiesSemanticRelation/preparedInfo/relation + --outputPath${sourcePath}/relation + --leavesPath${workingDir}/entitiesSemanticRelation/preparedInfo/leavesPath + --childParentPath${workingDir}/entitiesSemanticRelation/preparedInfo/childParentPath + --resultOrgPath${workingDir}/entitiesSemanticRelation/preparedInfo/resultOrgPath + --projectOrganizationPath${workingDir}/entitiesSemanticRelation/preparedInfo/projectOrganizationPath + --hive_metastore_uris${hive_metastore_uris} + --workingDir${workingDir}/entitiesSemanticRelation/working + --iterations${iterations} + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml new file mode 100644 index 0000000000..8d2c341057 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml new file mode 100644 index 0000000000..6d800d6e20 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -0,0 +1,369 @@ + + + + sourcePath + the source path + + + allowedsemrels + the semantic relationships allowed for propagation + + + outputPath + the output path + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/relation + ${nameNode}/${outputPath}/relation + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/organization + ${nameNode}/${outputPath}/organization + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/project + ${nameNode}/${outputPath}/project + + + + + + + + ${jobTracker} + ${nameNode} + ${nameNode}/${sourcePath}/datasource + ${nameNode}/${outputPath}/datasource + + + + + + + + + + yarn + cluster + ORCIDPropagation-PreparePhase1-Publications + eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} + + + + + + + + yarn + cluster + ORCIDPropagation-PreparePhase1-Dataset + eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} + + + + + + + + yarn + cluster + ORCIDPropagation-PreparePhase1-ORP + eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} + + + + + + + + yarn + cluster + ORCIDPropagation-PreparePhase1-Software + eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --allowedsemrels${allowedsemrels} + + + + + + + + + + yarn + cluster + ORCIDPropagation-PreparePhase2 + eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/orcidprop + --outputPath${workingDir}/orcidprop/mergedOrcidAssoc + + + + + + + + + + + + + + + yarn + cluster + ORCIDPropagation-Publication + eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + --conf spark.sql.shuffle.partitions=3840 + + --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --sourcePath${sourcePath}/publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${outputPath}/publication + + + + + + + + yarn + cluster + ORCIDPropagation-Dataset + eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + + --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --sourcePath${sourcePath}/dataset + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${outputPath}/dataset + + + + + + + + yarn + cluster + ORCIDPropagation-ORP + eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + + --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --sourcePath${sourcePath}/otherresearchproduct + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${outputPath}/otherresearchproduct + + + + + + + + yarn + cluster + ORCIDPropagation-Software + eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.speculation=false + --conf spark.hadoop.mapreduce.map.speculative=false + --conf spark.hadoop.mapreduce.reduce.speculative=false + + --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --sourcePath${sourcePath}/software + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${outputPath}/software + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml new file mode 100644 index 0000000000..caf3c60500 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml @@ -0,0 +1,63 @@ + + + jobTracker + yarnRM + + + + nameNode + + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml new file mode 100644 index 0000000000..93a2f98be3 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -0,0 +1,94 @@ + + + + sourcePath + the source path + + + allowedsemrels + the allowed semantics + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn + cluster + PrepareProjectResultsAssociation + eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath}/relation + --allowedsemrels${allowedsemrels} + --hive_metastore_uris${hive_metastore_uris} + --potentialUpdatePath${workingDir}/resultproject/preparedInfo/potentialUpdates + --alreadyLinkedPath${workingDir}/resultproject/preparedInfo/alreadyLinked + + + + + + + + yarn + cluster + ProjectToResultPropagation + eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --hive_metastore_uris${hive_metastore_uris} + --outputPath${sourcePath}/relation + --potentialUpdatePath${workingDir}/resultproject/preparedInfo/potentialUpdates + --alreadyLinkedPath${workingDir}/resultproject/preparedInfo/alreadyLinked + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml new file mode 100644 index 0000000000..2744ea92ba --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml new file mode 100644 index 0000000000..8aec530cc9 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -0,0 +1,88 @@ + + + + sourcePath + the source path + + + baseURL + the baseURL from where to reach the community APIs + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + Prepare-Community-Result-Organization + eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/relation + --outputPath${workingDir}/communityorganization/preparedInfo/resultCommunityList + --hive_metastore_uris${hive_metastore_uris} + --baseURL${baseURL} + + + + + + + + yarn + cluster + community2resultfromorganization-Publication + eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList + --sourcePath${sourcePath}/ + --outputPath${workingDir}/resulttocommunityfromorganization/ + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json new file mode 100644 index 0000000000..0db8085d19 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json @@ -0,0 +1,28 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "p", + "paramLongName": "preparedInfoPath", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": true + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json new file mode 100644 index 0000000000..cbc01c2d5a --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json @@ -0,0 +1,28 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "bu", + "paramLongName": "baseURL", + "paramDescription": "the path used to store temporary output files", + "paramRequired": false + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml new file mode 100644 index 0000000000..2744ea92ba --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml new file mode 100644 index 0000000000..90ed2e0b6f --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml @@ -0,0 +1,90 @@ + + + + sourcePath + the source path + + + baseURL + the base URL to use to select the right community APIs + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn + cluster + Prepare-Community-Result-Organization + eu.dnetlib.dhp.resulttocommunityfromproject.PrepareResultCommunitySet + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/relation + --outputPath${workingDir}/communitythroughproject/preparedInfo/resultCommunityList + --baseURL${baseURL} + + + + + + + + yarn + cluster + community2resultfromproject + eu.dnetlib.dhp.resulttocommunityfromproject.SparkResultToCommunityFromProject + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/communitythroughproject/preparedInfo/resultCommunityList + --sourcePath${sourcePath}/ + --outputPath${workingDir}/communitythroughproject/ + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml new file mode 100644 index 0000000000..2744ea92ba --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml new file mode 100644 index 0000000000..be88c45bdc --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -0,0 +1,305 @@ + + + + sourcePath + the source path + + + allowedsemrels + the semantic relationships allowed for propagation + + + baseURL + the isLookup service endpoint + + + outputPath + the output path + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + yarn + cluster + ResultToCommunitySemRel-PreparePhase1-Publications + eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc + --allowedsemrels${allowedsemrels} + --baseURL${baseURL} + + + + + + + + yarn + cluster + ResultToCommunitySemRel-PreparePhase1-Dataset + eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc + --allowedsemrels${allowedsemrels} + --baseURL${baseURL} + + + + + + + + yarn + cluster + ResultToCommunitySemRel-PreparePhase1-ORP + eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc + --allowedsemrels${allowedsemrels} + --baseURL${baseURL} + + + + + + + + yarn + cluster + ResultToCommunitySemRel-PreparePhase1-Software + eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc + --allowedsemrels${allowedsemrels} + --baseURL${baseURL} + + + + + + + + + + yarn + cluster + ResultToCommunityEmRelPropagation-PreparePhase2 + eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2 + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc + --outputPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc + + + + + + + + + + + + + + + yarn + cluster + Result2CommunitySemRelPropagation-Publication + eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc + --sourcePath${sourcePath}/publication + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/communitysemrel/publication + + + + + + + + + yarn + cluster + Result2CommunitySemRelPropagation-Dataset + eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc + --sourcePath${sourcePath}/dataset + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/communitysemrel/dataset + + + + + + + + + yarn + cluster + Result2CommunitySemRelPropagation-ORP + eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc + --sourcePath${sourcePath}/otherresearchproduct + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/communitysemrel/otherresearchproduct + + + + + + + + + yarn + cluster + Result2CommunitySemRelPropagation-Software + eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc + --sourcePath${sourcePath}/software + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/communitysemrel/software + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml new file mode 100644 index 0000000000..2744ea92ba --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml new file mode 100644 index 0000000000..8281130f37 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -0,0 +1,182 @@ + + + + sourcePath + the source path + + + blacklist + The list of institutional repositories that should not be used for the propagation + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn + cluster + PrepareResultOrganizationAssociation + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.PrepareResultInstRepoAssociation + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --workingPath${workingDir}/affiliationInstRepo + --blacklist${blacklist} + + + + + + + + + + + + + + + yarn + cluster + resultToOrganizationFromInstRepoPropagationForPublications + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/publication + --outputPath${sourcePath}/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + + + + + + + + yarn + cluster + resultToOrganizationFromInstRepoPropagationForDataset + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/dataset + --outputPath${sourcePath}/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + + + + + + + + yarn + cluster + resultToOrganizationFromInstRepoPropagationForORP + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/otherresearchproduct + --outputPath${sourcePath}/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + + + + + + + + yarn + cluster + resultToOrganizationFromInstRepoPropagationForSoftware + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${sourcePath}/software + --outputPath${sourcePath}/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked + --hive_metastore_uris${hive_metastore_uris} + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml new file mode 100644 index 0000000000..2744ea92ba --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml new file mode 100644 index 0000000000..7918df120b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml @@ -0,0 +1,97 @@ + + + + sourcePath + the source path + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + PrepareResultOrganizationAssociation + eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --graphPath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --leavesPath${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath + --childParentPath${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath + --resultOrgPath${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath + --relationPath${workingDir}/affiliationSemanticRelation/preparedInfo/relation + + + + + + + + yarn + cluster + resultToOrganizationFromSemRel + eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=3840 + + --relationPath${workingDir}/affiliationSemanticRelation/preparedInfo/relation + --outputPath${sourcePath} + --leavesPath${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath + --childParentPath${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath + --resultOrgPath${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath + --hive_metastore_uris${hive_metastore_uris} + --workingDir${workingDir}/affiliationSemanticRelation/working + --iterations${iterations} + + + + + + + + + + + + + + + + + \ No newline at end of file From d4eedada71436a7cae1a5ab154598503b8f36e91 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sat, 9 Dec 2023 15:20:11 +0100 Subject: [PATCH 02/56] adjusting workflow definition --- .../main/java/eu/dnetlib/dhp/api/Utils.java | 6 ++-- .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 17 +++++----- .../PrepareDatasourceCountryAssociation.java | 2 +- .../SparkCountryPropagationJob.java | 8 ++--- ...kResultToCommunityFromOrganizationJob.java | 8 ++--- .../SparkResultToCommunityFromProject.java | 8 ++--- .../PrepareResultCommunitySetStep1.java | 2 +- ...parkResultToCommunityThroughSemRelJob.java | 8 ++--- .../eu/dnetlib/dhp/wf/main/job.properties | 15 +++++++++ .../bulktag/oozie_app/workflow.xml | 2 +- .../oozie_app/workflow.xml | 31 +++++++++++++------ 11 files changed, 66 insertions(+), 41 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index bb30f55d6f..06d0f95c25 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -169,7 +169,9 @@ public class Utils implements Serializable { } public static List getCommunityIdList(String baseURL) throws IOException { - return getValidCommunities(baseURL).stream() - .map(community -> community.getId()).collect(Collectors.toList()); + return getValidCommunities(baseURL) + .stream() + .map(community -> community.getId()) + .collect(Collectors.toList()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 5745515baa..51307ccd1e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -105,7 +105,6 @@ public class SparkBulkTagJob { Map>> dsm = cc.getEoscDatasourceMap(); for (String ds : datasources.collectAsList()) { - // final String dsId = ds.substring(3); if (!dsm.containsKey(ds)) { ArrayList> eoscList = new ArrayList<>(); dsm.put(ds, eoscList); @@ -116,13 +115,11 @@ public class SparkBulkTagJob { private static boolean isOKDatasource(Datasource ds) { final String compatibility = ds.getOpenairecompatibility().getClassid(); - boolean isOk = (compatibility.equalsIgnoreCase(OPENAIRE_3) || + return (compatibility.equalsIgnoreCase(OPENAIRE_3) || compatibility.equalsIgnoreCase(OPENAIRE_4) || compatibility.equalsIgnoreCase(OPENAIRE_CRIS) || compatibility.equalsIgnoreCase(OPENAIRE_DATA)) && ds.getCollectedfrom().stream().anyMatch(cf -> cf.getKey().equals(EOSC)); - - return isOk; } private static void execBulkTag( @@ -151,13 +148,13 @@ public class SparkBulkTagJob { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + e.name());//writing the tagging in the working dir for entity + .json(outputPath + e.name());// writing the tagging in the working dir for entity - readPath(spark, outputPath + e.name(), resultClazz) //copy the tagging in the actual result output path - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(inputPath + e.name()); + readPath(spark, outputPath + e.name(), resultClazz) // copy the tagging in the actual result output path + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + e.name()); }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index b1720d19d1..2ffe6f36de 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -66,7 +66,7 @@ public class PrepareDatasourceCountryAssociation { conf, isSparkSessionManaged, spark -> { - //removeOutputDir(spark, outputPath); + // removeOutputDir(spark, outputPath); prepareDatasourceCountryAssociation( spark, Arrays.asList(parser.get("whitelist").split(";")), diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 2b0dd76281..17247f8125 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -98,10 +98,10 @@ public class SparkCountryPropagationJob { .json(outputPath); readPath(spark, outputPath, resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(sourcePath); + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(sourcePath); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 9152b1f5a9..adb7feef7e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -94,10 +94,10 @@ public class SparkResultToCommunityFromOrganizationJob { .json(outputPath + e.name()); readPath(spark, outputPath + e.name(), resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(inputPath + e.name()); + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + e.name()); } }); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index 5478915848..229ac7e32e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -104,10 +104,10 @@ public class SparkResultToCommunityFromProject implements Serializable { .json(outputPath + e.name()); readPath(spark, outputPath + e.name(), resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(inputPath + e.name()); + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + e.name()); } }); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index 73c4e2d7c0..40c074a6e5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -8,7 +8,6 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; -import eu.dnetlib.dhp.api.Utils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.sql.*; @@ -17,6 +16,7 @@ import org.slf4j.LoggerFactory; import com.google.gson.Gson; +import eu.dnetlib.dhp.api.Utils; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.schema.oaf.Relation; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index bb7ff1fb7b..a107378499 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -102,10 +102,10 @@ public class SparkResultToCommunityThroughSemRelJob { .json(outputPath); readPath(spark, outputPath, resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(inputPath); + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath); } private static MapFunction, R> contextUpdaterFn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties new file mode 100644 index 0000000000..6b9b5063fe --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -0,0 +1,15 @@ +sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched +resumeFrom=OrcidPropagation +allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo +allowedsemrelsresultproject=isSupplementedBy;isSupplementTo +allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo +datasourceWhitelistForCountryPropagation=10|openaire____::3795d6478e30e2c9f787d427ff160944;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14 +allowedtypes=pubsrepository::institutional +outputPath=/tmp/miriam/enrichment_one_step +organizationtoresultcommunitymap={"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|ukri________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|ukri________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|ukri________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|ukri________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|ukri________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|ukri________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"], "20|openorgs____::d11f981828c485cd23d93f7f24f24db1":["eut"], "20|openorgs____::e66fe5dd092752e1dd6fd29fc699933a":["eut"], "20|openorgs____::526468206bca24c1c90da6a312295cf4":["eut"], "20|openorgs____::08e311e656e65ccb32e07c66b15b6ff7":["eut"], "20|openorgs____::55a1f889758964b77682904218fdb298":["eut"], "20|openorgs____::530092b6970d60a5329beb9f39e8d7d4":["eut"], "20|openorgs____::aadafa39392b3e200102596a3a4aad9d":["eut"], "20|openorgs____::c3fe999c74fad308132b8a5971367dce":["eut"], "20|openorgs____::1624ff7c01bb641b91f4518539a0c28a":["aurora"], "20|openorgs____::cdda7cfe17c89eb50628ec2eb1f8acd2":["aurora"], "20|openorgs____::818b75030e0e40612d69e049843ede7e":["aurora"], "20|openorgs____::0b0102bae51f4f4ef5ba57fbe1523b92":["aurora"], "20|openorgs____::ed47496b44722f0e9d7b98898189be0d":["aurora"], "20|openorgs____::eb0669daa9efeb898a3090d8aac7c953":["aurora"], "20|openorgs____::eb391317ed0dc684aa81ac16265de041":["aurora"], "20|openorgs____::f7cfcc98245e22c7d6e321cde930e746":["aurora"], "20|openorgs____::f33179d3306ba2599f7a898b056b604f":["aurora"], "20|pending_org_::75c41e6dd18466709ef359323d96fa05":["aurora"]} +pathMap={"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid":"orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} +blacklist=empty +allowedpids=orcid;orcid_pending +baseURL = https://services.openaire.eu/openaire/community/ + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml index a735e2b0ee..307997d4cd 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml @@ -51,7 +51,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${sourcePath}/ - --workingPath${workingDir}/bulktag/ + --outputPath${workingDir}/bulktag/ --pathMap${pathMap} --baseURL${baseURL} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index 6d800d6e20..8e945ee5ac 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -80,7 +80,14 @@ - + + + + + + + + @@ -258,6 +265,7 @@ --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/publication + --hive_metastore_uris${hive_metastore_uris} @@ -288,6 +296,7 @@ --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/dataset + --hive_metastore_uris${hive_metastore_uris} @@ -318,6 +327,7 @@ --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/otherresearchproduct + --hive_metastore_uris${hive_metastore_uris} @@ -348,21 +358,22 @@ --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/software + --hive_metastore_uris${hive_metastore_uris} - + - - - - - - - - + + + + + + + + From 8752d275fae9bc7764cd2ee049b6321d44b70528 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sat, 9 Dec 2023 15:24:45 +0100 Subject: [PATCH 03/56] removed not needed parameter --- .../SparkOrcidToResultFromSemRelJob.java | 7 ++++--- .../input_orcidtoresult_parameters.json | 6 ------ .../orcidtoresultfromsemrel/oozie_app/workflow.xml | 4 ---- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index a38b4da2e8..998f4719ad 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -2,7 +2,8 @@ package eu.dnetlib.dhp.orcidtoresultfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.List; import java.util.Optional; @@ -65,9 +66,9 @@ public class SparkOrcidToResultFromSemRelJob { Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); - conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - runWithSparkHiveSession( + + runWithSparkSession( conf, isSparkSessionManaged, spark -> { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json index d8aa7eb9a9..3cbaa23bb6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json @@ -11,12 +11,6 @@ "paramDescription": "true if the new version of the graph must be saved", "paramRequired": false }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, { "paramName": "out", "paramLongName": "outputPath", diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index 8e945ee5ac..483a805b10 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -265,7 +265,6 @@ --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/publication - --hive_metastore_uris${hive_metastore_uris} @@ -296,7 +295,6 @@ --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/dataset - --hive_metastore_uris${hive_metastore_uris} @@ -327,7 +325,6 @@ --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/otherresearchproduct - --hive_metastore_uris${hive_metastore_uris} @@ -358,7 +355,6 @@ --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/software - --hive_metastore_uris${hive_metastore_uris} From 0d8e496a6317943a28282ffdd0ee5a4d735f61f7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 15 Dec 2023 12:16:43 +0100 Subject: [PATCH 04/56] - --- .../SparkOrcidToResultFromSemRelJob.java | 2 - .../AppendNewRelations.java | 75 +++++++++++++++++++ .../PrepareResultInstRepoAssociation.java | 7 +- ...arkResultToOrganizationFromIstRepoJob.java | 2 +- .../input_prepareresultorg_parameters.json | 13 +--- .../eu/dnetlib/dhp/wf/main/job.properties | 17 ++++- .../oozie_app/workflow.xml | 36 +++++++-- 7 files changed, 131 insertions(+), 21 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 998f4719ad..5f9260e5dc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.orcidtoresultfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; - import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.List; @@ -67,7 +66,6 @@ public class SparkOrcidToResultFromSemRelJob { SparkConf conf = new SparkConf(); - runWithSparkSession( conf, isSparkSessionManaged, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java new file mode 100644 index 0000000000..a5884873b1 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java @@ -0,0 +1,75 @@ + +package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; + +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; + +import java.io.Serializable; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bulktag.community.ResultTagger; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; + +/** + * @author miriam.baglioni + * @Date 09/12/23 + */ +public class AppendNewRelations implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(AppendNewRelations.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + AppendNewRelations.class + .getResourceAsStream( + "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkHiveSession( + conf, + isSparkSessionManaged, + spark -> appendNewRelation(spark, inputPath, outputPath)); + } + + private static void appendNewRelation(SparkSession spark, String inputPath, String outputPath) { + + readPath(spark, inputPath + "publication/relation", Relation.class) + .union(readPath(spark, inputPath + "dataset/relation", Relation.class)) + .union(readPath(spark, inputPath + "otherresearchproduct/relation", Relation.class)) + .union(readPath(spark, inputPath + "software/relation", Relation.class)) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(outputPath); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 1663afb32e..deec6fedc6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -52,10 +52,13 @@ public class PrepareResultInstRepoAssociation { String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); - final String datasourceOrganizationPath = parser.get("datasourceOrganizationPath"); + final String workingPath = parser.get("workingPath"); + log.info("workingPath : {}", workingPath); + + final String datasourceOrganizationPath = workingPath + "/preparedInfo/datasourceOrganization"; log.info("datasourceOrganizationPath {}: ", datasourceOrganizationPath); - final String alreadyLinkedPath = parser.get("alreadyLinkedPath"); + final String alreadyLinkedPath = workingPath + "/preparedInfo/alreadyLinked"; log.info("alreadyLinkedPath {}: ", alreadyLinkedPath); List blacklist = Optional diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 0757ebccd4..bbad20e2d5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -119,7 +119,7 @@ public class SparkResultToOrganizationFromIstRepoJob { "left_outer") .flatMap(createRelationFn(), Encoders.bean(Relation.class)) .write() - .mode(SaveMode.Append) + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json index 2f00bacae3..3f4b1d151b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json @@ -11,16 +11,11 @@ "paramDescription": "the hive metastore uris", "paramRequired": true }, + { - "paramName":"dop", - "paramLongName":"datasourceOrganizationPath", - "paramDescription": "path where to store/find association from datasource and organization", - "paramRequired": true - }, - { - "paramName":"alp", - "paramLongName":"alreadyLinkedPath", - "paramDescription": "path where to store/find already linked results and organizations", + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the working path", "paramRequired": true }, { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 6b9b5063fe..243c1e99d1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,5 +1,5 @@ sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched -resumeFrom=OrcidPropagation +resumeFrom=AffiliationInstitutionalRepository allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo @@ -7,7 +7,20 @@ datasourceWhitelistForCountryPropagation=10|openaire____::3795d6478e30e2c9f787d4 allowedtypes=pubsrepository::institutional outputPath=/tmp/miriam/enrichment_one_step organizationtoresultcommunitymap={"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|ukri________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|ukri________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|ukri________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|ukri________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|ukri________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|ukri________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"], "20|openorgs____::d11f981828c485cd23d93f7f24f24db1":["eut"], "20|openorgs____::e66fe5dd092752e1dd6fd29fc699933a":["eut"], "20|openorgs____::526468206bca24c1c90da6a312295cf4":["eut"], "20|openorgs____::08e311e656e65ccb32e07c66b15b6ff7":["eut"], "20|openorgs____::55a1f889758964b77682904218fdb298":["eut"], "20|openorgs____::530092b6970d60a5329beb9f39e8d7d4":["eut"], "20|openorgs____::aadafa39392b3e200102596a3a4aad9d":["eut"], "20|openorgs____::c3fe999c74fad308132b8a5971367dce":["eut"], "20|openorgs____::1624ff7c01bb641b91f4518539a0c28a":["aurora"], "20|openorgs____::cdda7cfe17c89eb50628ec2eb1f8acd2":["aurora"], "20|openorgs____::818b75030e0e40612d69e049843ede7e":["aurora"], "20|openorgs____::0b0102bae51f4f4ef5ba57fbe1523b92":["aurora"], "20|openorgs____::ed47496b44722f0e9d7b98898189be0d":["aurora"], "20|openorgs____::eb0669daa9efeb898a3090d8aac7c953":["aurora"], "20|openorgs____::eb391317ed0dc684aa81ac16265de041":["aurora"], "20|openorgs____::f7cfcc98245e22c7d6e321cde930e746":["aurora"], "20|openorgs____::f33179d3306ba2599f7a898b056b604f":["aurora"], "20|pending_org_::75c41e6dd18466709ef359323d96fa05":["aurora"]} -pathMap={"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid":"orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} +pathMap ={"author":"$['author'][*]['fullname']", \ + "title":"$['title'][*]['value']",\ + "orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" ,\ + "orcid_pending":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid_pending')]['value']" ,\ + "contributor" : "$['contributor'][*]['value']",\ + "description" : "$['description'][*]['value']",\ + "subject" :"$['subject'][*]['value']" , \ + "fos" : "$['subject'][?(@['qualifier']['classid']=='FOS')].value" ,\ + "sdg" : "$['subject'][?(@['qualifier']['classid']=='SDG')].value",\ + "journal":"$['journal'].name",\ + "hostedby":"$['instance'][*]['hostedby']['key']",\ + "collectedfrom":"$['instance'][*]['collectedfrom']['key']",\ + "publisher":"$['publisher'].value",\ + "publicationyear":"$['dateofacceptance'].value"} blacklist=empty allowedpids=orcid;orcid_pending baseURL = https://services.openaire.eu/openaire/community/ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml index 8281130f37..dadea2d280 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -47,6 +47,7 @@ --sourcePath${sourcePath} --workingPath${workingDir}/affiliationInstRepo --blacklist${blacklist} + --hive_metastore_uris${hive_metastore_uris} @@ -78,7 +79,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/publication - --outputPath${sourcePath}/relation + --outputPath${workingDir}/affiliationinstrepo/publication/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -107,7 +108,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/dataset - --outputPath${sourcePath}/relation + --outputPath${workingDir}/affiliationinstrepo/dataset/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -136,7 +137,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/otherresearchproduct - --outputPath${sourcePath}/relation + --outputPath${workingDir}/affiliationinstrepo/otherresearchproduct/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -165,7 +166,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/software - --outputPath${sourcePath}/relation + --outputPath${workingDir}/affiliationinstrepo/software/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -175,7 +176,32 @@ - + + + + + yarn + cluster + append new relations + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.AppendNewRelations + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --outputPath${sourcePath}/relation + --sourcePath${workingDir}/affiliationinstrepo/ + + + + From 3eca5d2e1c302a7427ffa735c95ac96a6419caec Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 18 Dec 2023 09:55:27 +0100 Subject: [PATCH 05/56] - --- ...SemRel.java => SparkEntityToOrganizationFromSemRel.java} | 4 ++-- .../dhp/entitytoorganizationfromsemrel/StepActions.java | 5 ++--- .../entitytoorganizationfromsemrel/oozie_app/workflow.xml | 2 +- .../main/resources/eu/dnetlib/dhp/wf/main/job.properties | 2 +- .../resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml | 2 +- .../entitytoorganizationfromsemrel/oozie_app/workflow.xml | 6 +++--- .../dhp/entitytoorganizationfromsemrel/SparkJobTest.java | 6 +++--- 7 files changed, 13 insertions(+), 14 deletions(-) rename dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/{SparkResultToOrganizationFromSemRel.java => SparkEntityToOrganizationFromSemRel.java} (98%) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java similarity index 98% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java index 27e502aba0..87c0ec2b9d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java @@ -27,8 +27,8 @@ import eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganization import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; -public class SparkResultToOrganizationFromSemRel implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class); +public class SparkEntityToOrganizationFromSemRel implements Serializable { + private static final Logger log = LoggerFactory.getLogger(SparkEntityToOrganizationFromSemRel.class); private static final int MAX_ITERATION = 5; public static final String NEW_RESULT_RELATION_PATH = "/newResultRelation"; public static final String NEW_PROJECT_RELATION_PATH = "/newProjectRelation"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java index 386ea1a5cd..36a7523c50 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java @@ -3,8 +3,8 @@ package eu.dnetlib.dhp.entitytoorganizationfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.readPath; -import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_PROJECT_RELATION_PATH; -import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_RESULT_RELATION_PATH; +import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel.NEW_PROJECT_RELATION_PATH; +import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel.NEW_RESULT_RELATION_PATH; import java.io.Serializable; import java.util.*; @@ -20,7 +20,6 @@ import org.jetbrains.annotations.NotNull; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.KeyValueSet; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml index 16c8c4e19d..851aabe8b1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -162,7 +162,7 @@ yarn cluster resultToOrganizationFromSemRel - eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel + eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 243c1e99d1..6085cd2b26 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,5 +1,5 @@ sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched -resumeFrom=AffiliationInstitutionalRepository +resumeFrom=default allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index 1e6736bf44..33f8496450 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -120,7 +120,7 @@ ${wf:conf('resumeFrom') eq 'BulkTagging'} ${wf:conf('resumeFrom') eq 'AffiliationInstitutionalRepository'} - ${wf:conf('resumeFrom') eq 'AffiliationSemanticRelation'} + ${wf:conf('resumeFrom') eq 'AffiliationSemanticRelation'} ${wf:conf('resumeFrom') eq 'CommunityOrganization'} ${wf:conf('resumeFrom') eq 'ResultProject'} ${wf:conf('resumeFrom') eq 'CommunityProject'} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml index e3f3c1758d..dbb22b9948 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -29,7 +29,7 @@ yarn cluster - PrepareResultOrganizationAssociation + PrepareResultProjectOrganizationAssociation eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo dhp-enrichment-${projectVersion}.jar @@ -57,8 +57,8 @@ yarn cluster - resultToOrganizationFromSemRel - eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel + entityToOrganizationFromSemRel + eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel dhp-enrichment-${projectVersion}.jar --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java index 517a20cd96..db917658a6 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java @@ -114,7 +114,7 @@ public class SparkJobTest { .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); - SparkResultToOrganizationFromSemRel + SparkEntityToOrganizationFromSemRel .main( new String[] { @@ -395,7 +395,7 @@ public class SparkJobTest { .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); - SparkResultToOrganizationFromSemRel + SparkEntityToOrganizationFromSemRel .main( new String[] { @@ -678,7 +678,7 @@ public class SparkJobTest { .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); - SparkResultToOrganizationFromSemRel + SparkEntityToOrganizationFromSemRel .main( new String[] { From 9d342a47da489d71c3a739b06943a4f09a1225ee Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 18 Dec 2023 11:48:57 +0100 Subject: [PATCH 06/56] updated the transformation Baseline workflow to include mdstore rollback/commit action --- .../dhp/sx/bio/pubmed/oozie_app/workflow.xml | 69 +++++++++++++++++-- .../ebi/SparkCreateBaselineDataFrame.scala | 18 +++-- 2 files changed, 78 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml index 8915a090bd..30eb414698 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + baselineWorkingPath @@ -9,8 +9,12 @@ The IS lookUp service endopoint - targetPath - The target path + mdStoreOutputId + the identifier of the cleaned MDStore + + + mdStoreManagerURI + the path of the cleaned mdstore skipUpdate @@ -19,12 +23,31 @@ - + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionNEW_VERSION + --mdStoreID${mdStoreOutputId} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + yarn @@ -43,16 +66,52 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --workingPath${baselineWorkingPath} - --targetPath${targetPath} + --mdstoreOutputVersion${wf:actionData('StartTransaction')['mdStoreVersion']} --masteryarn --isLookupUrl${isLookupUrl} --hdfsServerUri${nameNode} --skipUpdate${skipUpdate} + + + + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionCOMMIT + --namenode${nameNode} + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionROLLBACK + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 8ac8b00bfa..639918151b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -2,9 +2,12 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.collection.CollectionUtils +import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup +import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} import eu.dnetlib.dhp.sx.bio.pubmed._ +import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile} import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration @@ -164,11 +167,15 @@ object SparkCreateBaselineDataFrame { val workingPath = parser.get("workingPath") log.info("workingPath: {}", workingPath) - val targetPath = parser.get("targetPath") - log.info("targetPath: {}", targetPath) + val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") + log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion) + + val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) + val outputBasePath = cleanedMdStoreVersion.getHdfsPath + log.info("outputBasePath: {}", outputBasePath) val hdfsServerUri = parser.get("hdfsServerUri") - log.info("hdfsServerUri: {}", targetPath) + log.info("hdfsServerUri: {}", hdfsServerUri) val skipUpdate = parser.get("skipUpdate") log.info("skipUpdate: {}", skipUpdate) @@ -216,8 +223,11 @@ object SparkCreateBaselineDataFrame { .map(a => PubMedToOaf.convert(a, vocabularies)) .as[Oaf] .filter(p => p != null), - targetPath + s"$outputBasePath/$MDSTORE_DATA_PATH" ) + val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH") + val mdStoreSize = df.count + writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH") } } From 15fd93a2b66f6829cfac0b1350266664371d1df5 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 18 Dec 2023 12:21:55 +0100 Subject: [PATCH 07/56] uploaded input parameters on CreateBaseline WF --- .../eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json index 8dc8a2aaeb..3ba83764df 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json @@ -2,7 +2,7 @@ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"i", "paramLongName":"isLookupUrl", "paramDescription": "isLookupUrl", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the oaf path ", "paramRequired": true}, + {"paramName":"mo", "paramLongName":"mdstoreOutputVersion", "paramDescription": "the oaf path ", "paramRequired": true}, {"paramName":"s", "paramLongName":"skipUpdate", "paramDescription": "skip update ", "paramRequired": false}, {"paramName":"h", "paramLongName":"hdfsServerUri", "paramDescription": "the working path ", "paramRequired": true} ] \ No newline at end of file From d410ea8a4176341cdebaa76179c77b5fdd45c631 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 19 Dec 2023 12:15:01 +0100 Subject: [PATCH 08/56] added needed parameter --- .../AppendNewRelations.java | 11 +- .../oozie_app/workflow.xml | 112 ++---------------- .../input_newrelation_parameters.json | 20 ++++ .../eu/dnetlib/dhp/wf/main/job.properties | 4 +- .../dhp/wf/main/oozie_app/workflow.xml | 10 +- 5 files changed, 41 insertions(+), 116 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java index a5884873b1..636c14b655 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java @@ -2,26 +2,19 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.Objects; -import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bulktag.community.ResultTagger; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; /** * @author miriam.baglioni @@ -54,7 +47,7 @@ public class AppendNewRelations implements Serializable { SparkConf conf = new SparkConf(); - runWithSparkHiveSession( + runWithSparkSession( conf, isSparkSessionManaged, spark -> appendNewRelation(spark, inputPath, outputPath)); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml index 851aabe8b1..d7335d8408 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -5,9 +5,10 @@ the source path - outputPath - sets the outputPath + iterations + the number of hops to be done up on the hierarchy + @@ -21,119 +22,26 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - ${wf:conf('resumeFrom') eq 'PrepareInfo'} - - - - - - + + - + - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/publication - ${nameNode}/${outputPath}/publication - - - - - - - - ${nameNode}/${sourcePath}/dataset - ${nameNode}/${outputPath}/dataset - - - - - - - - ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${outputPath}/otherresearchproduct - - - - - - - - ${nameNode}/${sourcePath}/software - ${nameNode}/${outputPath}/software - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - yarn cluster - PrepareResultOrganizationAssociation + PrepareResultProjectOrganizationAssociation eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo dhp-enrichment-${projectVersion}.jar @@ -161,7 +69,7 @@ yarn cluster - resultToOrganizationFromSemRel + resultProjectToOrganizationFromSemRel eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel dhp-enrichment-${projectVersion}.jar @@ -177,7 +85,7 @@ --conf spark.sql.shuffle.partitions=3840 --relationPath${workingDir}/preparedInfo/relation - --outputPath${outputPath}/relation + --outputPath${sourcePath}/relation --leavesPath${workingDir}/preparedInfo/leavesPath --childParentPath${workingDir}/preparedInfo/childParentPath --resultOrgPath${workingDir}/preparedInfo/resultOrgPath diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json new file mode 100644 index 0000000000..5fe92cff13 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + },{ + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "institutional repositories that should not be considered for the propagation", + "paramRequired": false +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 6085cd2b26..93e9e0ab1d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,5 +1,5 @@ sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched -resumeFrom=default +resumeFrom=AffiliationSemanticRelation allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo @@ -24,5 +24,5 @@ pathMap ={"author":"$['author'][*]['fullname']", \ blacklist=empty allowedpids=orcid;orcid_pending baseURL = https://services.openaire.eu/openaire/community/ - +iterations=1 diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index 33f8496450..de054b962b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -195,13 +195,13 @@ - + - + - ${wf:appPath()}/affiliation_semantic_relation + ${wf:appPath()}/entity_semantic_relation @@ -209,6 +209,10 @@ sourcePath ${outputPath} + + iterations + ${iterations} + From 4740c808f735193e8975f09e1a6841eb4d9a676f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Dec 2023 14:26:54 +0100 Subject: [PATCH 09/56] - --- .../PrepareDatasourceCountryAssociation.java | 2 +- .../bulktag/datasourcemaster_parameters.json | 32 -- .../dhp/bulktag/input_bulkTag_parameters.json | 38 -- .../dhp/bulktag/input_eoscTag_parameters.json | 21 - .../input_eosc_bulkTag_parameters.json | 41 -- .../dhp/bulktag/oozie_app/config-default.xml | 54 --- .../dhp/bulktag/oozie_app/workflow.xml | 120 ------ .../input_countrypropagation_parameters.json | 32 -- .../input_prepareassoc_parameters.json | 32 -- ...input_prepareresultcountry_parameters.json | 38 -- .../oozie_app/config-default.xml | 58 --- .../countrypropagation/oozie_app/workflow.xml | 375 ------------------ .../input_preparation_parameter.json | 50 --- .../input_propagation_parameter.json | 62 --- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 105 ----- .../input_orcidtoresult_parameters.json | 44 -- ...input_prepareorcidtoresult_parameters.json | 38 -- ...nput_prepareorcidtoresult_parameters2.json | 20 - .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 371 ----------------- ...put_prepareprojecttoresult_parameters.json | 33 -- .../input_projecttoresult_parameters.json | 44 -- .../oozie_app/config-default.xml | 63 --- .../projecttoresult/oozie_app/workflow.xml | 184 --------- .../input_communitytoresult_parameters.json | 28 -- ...t_preparecommunitytoresult_parameters.json | 33 -- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 147 ------- .../input_communitytoresult_parameters.json | 28 -- ...t_preparecommunitytoresult_parameters.json | 28 -- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 144 ------- .../input_communitytoresult_parameters.json | 52 --- ..._preparecommunitytoresult2_parameters.json | 20 - ...t_preparecommunitytoresult_parameters.json | 44 -- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 366 ----------------- .../input_newrelation_parameters.json | 20 - .../input_prepareresultorg_parameters.json | 32 -- ...sulaffiliationfrominstrepo_parameters.json | 56 --- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 277 ------------- .../eu/dnetlib/dhp/wf/main/job.properties | 8 +- .../dhp/wf/main/oozie_app/workflow.xml | 6 +- .../bulktag/oozie_app/workflow.xml | 10 +- .../countrypropagation/oozie_app/workflow.xml | 10 +- .../oozie_app/workflow.xml | 10 +- .../projecttoresult/oozie_app/workflow.xml | 12 +- .../oozie_app/workflow.xml | 15 +- .../oozie_app/workflow.xml | 11 +- .../oozie_app/workflow.xml | 68 +++- .../oozie_app/workflow.xml | 21 +- .../oozie_app/config-default.xml | 58 --- .../oozie_app/workflow.xml | 97 ----- .../graph/hostedbymap/oozie_app/download.sh | 2 +- 56 files changed, 127 insertions(+), 3681 deletions(-) delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 2ffe6f36de..430c265924 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -90,7 +90,7 @@ public class PrepareDatasourceCountryAssociation { (FilterFunction) ds -> !ds.getDataInfo().getDeletedbyinference() && Optional.ofNullable(ds.getDatasourcetype()).isPresent() && Optional.ofNullable(ds.getDatasourcetype().getClassid()).isPresent() && - (allowedtypes.contains(ds.getDatasourcetype().getClassid()) || + (allowedtypes.contains(ds.getJurisdiction().getClassid()) || whitelist.contains(ds.getId()))); // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json deleted file mode 100644 index 9a2eadaa7d..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName": "p", - "paramLongName": "hdfsPath", - "paramDescription": "the path where storing the sequential file", - "paramRequired": true - }, - { - "paramName": "nn", - "paramLongName": "hdfsNameNode", - "paramDescription": "the name node on hdfs", - "paramRequired": true - }, - { - "paramName": "pgurl", - "paramLongName": "postgresUrl", - "paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb", - "paramRequired": true - }, - { - "paramName": "pguser", - "paramLongName": "postgresUser", - "paramDescription": "postgres user", - "paramRequired": false - }, - { - "paramName": "pgpasswd", - "paramLongName": "postgresPassword", - "paramDescription": "postgres password", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json deleted file mode 100644 index ce1a8ecab6..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "pm", - "paramLongName":"pathMap", - "paramDescription": "the json path associated to each selection field", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "tg", - "paramLongName": "taggingConf", - "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", - "paramRequired": false - }, - { - "paramName": "bu", - "paramLongName": "baseURL", - "paramDescription": "this parameter is to specify the api to be queried (beta or production)", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json deleted file mode 100644 index 4c25fea019..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json +++ /dev/null @@ -1,21 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "wp", - "paramLongName": "workingPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json deleted file mode 100644 index 5aace346d9..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json +++ /dev/null @@ -1,41 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "dmp", - "paramLongName":"datasourceMapPath", - "paramDescription": "the path where the association datasource master has been stored", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName": "wp", - "paramLongName": "workingPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - - "paramName": "rt", - "paramLongName": "resultType", - "paramDescription": "the result type", - "paramRequired": true - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml deleted file mode 100644 index fe82ae1940..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml +++ /dev/null @@ -1,54 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml deleted file mode 100644 index 0d4d1f046d..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - sourcePath - the source path - - - pathMap - the json path associated to each selection field - - - outputPath - the output path - - - baseURL - the community API base URL - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn-cluster - cluster - bulkTagging-result - eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-enrichment-${projectVersion}.jar - - --num-executors=${sparkExecutorNumber} - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/ - --outputPath${outputPath}/ - --pathMap${pathMap} - --baseURL${baseURL} - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json deleted file mode 100644 index f217e24582..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "p", - "paramLongName": "preparedInfoPath", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json deleted file mode 100644 index a00105f2ba..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "w", - "paramLongName": "whitelist", - "paramDescription": "the datasource having a type different from the allowed ones but that we want to add anyway", - "paramRequired": true - }, - { - "paramName": "at", - "paramLongName": "allowedtypes", - "paramDescription": "the allowed datasource types for country propagation", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json deleted file mode 100644 index 18163d1f96..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"out", - "paramLongName":"outputPath", - "paramDescription": "the output path", - "paramRequired": true - }, - { - "paramName":"w", - "paramLongName":"workingPath", - "paramDescription": "the working path", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName": "p", - "paramLongName": "preparedInfoPath", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92ba..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml deleted file mode 100644 index 271ccbf722..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml +++ /dev/null @@ -1,375 +0,0 @@ - - - - sourcePath - the source path - - - whitelist - the white list - - - allowedtypes - the allowed types - - - outputPath - the output path - - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn - cluster - PrepareDatasourceCountryAssociation - eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath} - --whitelist${whitelist} - --allowedtypes${allowedtypes} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - yarn - cluster - prepareResultCountry-Publication - eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/publication - --outputPath${workingDir}/publication - --workingPath${workingDir}/workingP - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - prepareResultCountry-Dataset - eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/dataset - --outputPath${workingDir}/dataset - --workingPath${workingDir}/workingD - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - prepareResultCountry-ORP - eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/otherresearchproduct - --outputPath${workingDir}/otherresearchproduct - --workingPath${workingDir}/workingO - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - prepareResultCountry-Software - eu.dnetlib.dhp.countrypropagation.PrepareResultCountrySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/software - --outputPath${workingDir}/software - --workingPath${workingDir}/workingS - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - - - yarn - cluster - countryPropagationForPublications - eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/publication - --preparedInfoPath${workingDir}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication - - - - - - - - yarn - cluster - countryPropagationForDataset - eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/dataset - --preparedInfoPath${workingDir}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - - - - - - - - yarn - cluster - countryPropagationForORP - eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/otherresearchproduct - --preparedInfoPath${workingDir}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - - - - - - - - yarn - cluster - countryPropagationForSoftware - eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/software - --preparedInfoPath${workingDir}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json deleted file mode 100644 index b599373318..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json +++ /dev/null @@ -1,50 +0,0 @@ -[ - { - "paramName":"gp", - "paramLongName":"graphPath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName":"lp", - "paramLongName":"leavesPath", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName":"cp", - "paramLongName":"childParentPath", - "paramDescription": "path where to store/find association from datasource and organization", - "paramRequired": true - }, - { - "paramName":"rp", - "paramLongName":"resultOrgPath", - "paramDescription": "path where to store/find already linked results and organizations", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - }, - { - "paramName": "rep", - "paramLongName": "relationPath", - "paramDescription": "the path where to store the selected subset of relations", - "paramRequired": false - }, - { - "paramName": "pop", - "paramLongName": "projectOrganizationPath", - "paramDescription": "the number of iterations to be computed", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json deleted file mode 100644 index 66a7f5b2f5..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json +++ /dev/null @@ -1,62 +0,0 @@ -[ - { - "paramName":"rep", - "paramLongName":"relationPath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName":"lp", - "paramLongName":"leavesPath", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName":"cp", - "paramLongName":"childParentPath", - "paramDescription": "path where to store/find association from datasource and organization", - "paramRequired": true - }, - { - "paramName":"rp", - "paramLongName":"resultOrgPath", - "paramDescription": "path where to store/find already linked results and organizations", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - }, - { - "paramName": "wd", - "paramLongName": "workingDir", - "paramDescription": "true if it is a test running", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "it", - "paramLongName": "iterations", - "paramDescription": "the number of iterations to be computed", - "paramRequired": false - }, - { - "paramName": "pop", - "paramLongName": "projectOrganizationPath", - "paramDescription": "the number of iterations to be computed", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92ba..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml deleted file mode 100644 index d7335d8408..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ /dev/null @@ -1,105 +0,0 @@ - - - - sourcePath - the source path - - - iterations - the number of hops to be done up on the hierarchy - - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - yarn - cluster - PrepareResultProjectOrganizationAssociation - eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --graphPath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --leavesPath${workingDir}/preparedInfo/leavesPath - --childParentPath${workingDir}/preparedInfo/childParentPath - --resultOrgPath${workingDir}/preparedInfo/resultOrgPath - --projectOrganizationPath${workingDir}/preparedInfo/projectOrganizationPath - --relationPath${workingDir}/preparedInfo/relation - - - - - - - - yarn - cluster - resultProjectToOrganizationFromSemRel - eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.sql.shuffle.partitions=3840 - - --relationPath${workingDir}/preparedInfo/relation - --outputPath${sourcePath}/relation - --leavesPath${workingDir}/preparedInfo/leavesPath - --childParentPath${workingDir}/preparedInfo/childParentPath - --resultOrgPath${workingDir}/preparedInfo/resultOrgPath - --projectOrganizationPath${workingDir}/preparedInfo/projectOrganizationPath - --hive_metastore_uris${hive_metastore_uris} - --workingDir${workingDir}/working - --iterations${iterations} - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json deleted file mode 100644 index 3cbaa23bb6..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json +++ /dev/null @@ -1,44 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"sg", - "paramLongName":"saveGraph", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName":"pu", - "paramLongName":"possibleUpdatesPath", - "paramDescription": "the path the the association resultId orcid author list can be found", - "paramRequired": true - }, - { - "paramName":"test", - "paramLongName":"isTest", - "paramDescription": "true if it is executing a test", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json deleted file mode 100644 index 08648d61a1..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"as", - "paramLongName":"allowedsemrels", - "paramDescription": "the allowed sematinc relations for propagation", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json deleted file mode 100644 index 1a67134a6b..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml deleted file mode 100644 index 8d2c341057..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml deleted file mode 100644 index 5f52c16585..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ /dev/null @@ -1,371 +0,0 @@ - - - - sourcePath - the source path - - - allowedsemrels - the semantic relationships allowed for propagation - - - outputPath - the output path - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-Publications - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-Dataset - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-ORP - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase1-Software - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc - --allowedsemrels${allowedsemrels} - - - - - - - - - - yarn - cluster - ORCIDPropagation-PreparePhase2 - eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep2 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${workingDir}/preparedInfo/targetOrcidAssoc - --outputPath${workingDir}/preparedInfo/mergedOrcidAssoc - - - - - - - - - - - - - - - yarn - cluster - ORCIDPropagation-Publication - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 - - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath${sourcePath}/publication - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication - - - - - - - - yarn - cluster - ORCIDPropagation-Dataset - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath${sourcePath}/dataset - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - - - - - - - - yarn - cluster - ORCIDPropagation-ORP - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath${sourcePath}/otherresearchproduct - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - - - - - - - - yarn - cluster - ORCIDPropagation-Software - eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.speculation=false - --conf spark.hadoop.mapreduce.map.speculative=false - --conf spark.hadoop.mapreduce.reduce.speculative=false - - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc - --sourcePath${sourcePath}/software - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json deleted file mode 100644 index a70dbd6a08..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json +++ /dev/null @@ -1,33 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName":"asr", - "paramLongName":"allowedsemrels", - "paramDescription": "the types of the allowed datasources. Split by ;", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName":"pu", - "paramLongName":"potentialUpdatePath", - "paramDescription": "the path of the potential updates ", - "paramRequired": true - }, - { - "paramName":"al", - "paramLongName":"alreadyLinkedPath", - "paramDescription": "the path of the already linked project result_set", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json deleted file mode 100644 index 7f44ba03cd..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json +++ /dev/null @@ -1,44 +0,0 @@ -[ - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName":"sg", - "paramLongName":"saveGraph", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName":"pu", - "paramLongName":"potentialUpdatePath", - "paramDescription": "the path of the potential updates ", - "paramRequired": true - }, - { - "paramName":"al", - "paramLongName":"alreadyLinkedPath", - "paramDescription": "the path of the already linked project result_set", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "test", - "paramLongName": "isTest", - "paramDescription": "true if it is a test running", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml deleted file mode 100644 index caf3c60500..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - jobTracker - yarnRM - - - - nameNode - - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml deleted file mode 100644 index 9e91c06fb3..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml +++ /dev/null @@ -1,184 +0,0 @@ - - - - sourcePath - the source path - - - allowedsemrels - the allowed semantics - - - outputPath - the output path - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/publication - ${nameNode}/${outputPath}/publication - - - - - - - - ${nameNode}/${sourcePath}/dataset - ${nameNode}/${outputPath}/dataset - - - - - - - - ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${outputPath}/otherresearchproduct - - - - - - - - ${nameNode}/${sourcePath}/software - ${nameNode}/${outputPath}/software - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn - cluster - PrepareProjectResultsAssociation - eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/relation - --allowedsemrels${allowedsemrels} - --hive_metastore_uris${hive_metastore_uris} - --potentialUpdatePath${workingDir}/preparedInfo/potentialUpdates - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - - - - - - - - yarn - cluster - ProjectToResultPropagation - eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --saveGraph${saveGraph} - --hive_metastore_uris${hive_metastore_uris} - --outputPath${outputPath}/relation - --potentialUpdatePath${workingDir}/preparedInfo/potentialUpdates - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json deleted file mode 100644 index 0db8085d19..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json +++ /dev/null @@ -1,28 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "p", - "paramLongName": "preparedInfoPath", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": true - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json deleted file mode 100644 index 3601db7acc..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json +++ /dev/null @@ -1,33 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "bu", - "paramLongName": "baseURL", - "paramDescription": "the base URL to the community API to use", - "paramRequired": false - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92ba..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml deleted file mode 100644 index dfa762ac67..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml +++ /dev/null @@ -1,147 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - the output path - - - baseURL - the community API base URL - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn - cluster - Prepare-Community-Result-Organization - eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=6 - --executor-memory=5G - --conf spark.executor.memoryOverhead=3g - --conf spark.sql.shuffle.partitions=3284 - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/preparedInfo/resultCommunityList - --hive_metastore_uris${hive_metastore_uris} - --baseURL${baseURL} - - - - - - - - yarn - cluster - community2resultfromorganization - eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=6 - --executor-memory=5G - --conf spark.executor.memoryOverhead=3g - --conf spark.sql.shuffle.partitions=3284 - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList - --sourcePath${sourcePath}/ - --outputPath${outputPath}/ - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json deleted file mode 100644 index 0db8085d19..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json +++ /dev/null @@ -1,28 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "p", - "paramLongName": "preparedInfoPath", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": true - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json deleted file mode 100644 index cbc01c2d5a..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json +++ /dev/null @@ -1,28 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "bu", - "paramLongName": "baseURL", - "paramDescription": "the path used to store temporary output files", - "paramRequired": false - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92ba..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml deleted file mode 100644 index 21cc2d8874..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml +++ /dev/null @@ -1,144 +0,0 @@ - - - - sourcePath - the source path - - - - outputPath - the output path - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn - cluster - Prepare-Community-Result-Organization - eu.dnetlib.dhp.resulttocommunityfromproject.PrepareResultCommunitySet - dhp-enrichment-${projectVersion}.jar - - --executor-cores=6 - --executor-memory=5G - --conf spark.executor.memoryOverhead=3g - --conf spark.sql.shuffle.partitions=3284 - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/preparedInfo/resultCommunityList - --production${production} - - - - - - - - yarn - cluster - community2resultfromproject - eu.dnetlib.dhp.resulttocommunityfromproject.SparkResultToCommunityFromProject - dhp-enrichment-${projectVersion}.jar - - --executor-cores=6 - --executor-memory=5G - --conf spark.executor.memoryOverhead=3g - --conf spark.sql.shuffle.partitions=3284 - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList - --sourcePath${sourcePath}/ - --outputPath${outputPath}/ - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json deleted file mode 100644 index a40ce375ee..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json +++ /dev/null @@ -1,52 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"sg", - "paramLongName":"saveGraph", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName": "p", - "paramLongName": "preparedInfoPath", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": true - }, - { - "paramName":"test", - "paramLongName":"isTest", - "paramDescription": "true if it is executing a test", - "paramRequired": false - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json deleted file mode 100644 index 3ba3c8e9c7..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json deleted file mode 100644 index 271db10bb7..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json +++ /dev/null @@ -1,44 +0,0 @@ -[ - { - "paramName":"bu", - "paramLongName":"baseURL", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": true - }, - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"as", - "paramLongName":"allowedsemrels", - "paramDescription": "the allowed semantic relations for propagation", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92ba..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml deleted file mode 100644 index 916eb8b7ce..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ /dev/null @@ -1,366 +0,0 @@ - - - - sourcePath - the source path - - - allowedsemrels - the semantic relationships allowed for propagation - - - baseURL - the baseurl for the comminity APIs - - - outputPath - the output path - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - - - - - - - - yarn - cluster - ResultToCommunitySemRel-PreparePhase1-Publications - eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc - --allowedsemrels${allowedsemrels} - --baseURL${baseURL} - - - - - - - - yarn - cluster - ResultToCommunitySemRel-PreparePhase1-Dataset - eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc - --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn - cluster - ResultToCommunitySemRel-PreparePhase1-ORP - eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc - --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn - cluster - ResultToCommunitySemRel-PreparePhase1-Software - eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc - --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} - - - - - - - - - - yarn - cluster - ResultToCommunityEmRelPropagation-PreparePhase2 - eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2 - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${workingDir}/preparedInfo/targetCommunityAssoc - --outputPath${workingDir}/preparedInfo/mergedCommunityAssoc - - - - - - - - - - - - - - - yarn - cluster - Result2CommunitySemRelPropagation-Publication - eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc - --sourcePath${sourcePath}/publication - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication - --saveGraph${saveGraph} - - - - - - - - yarn - cluster - Result2CommunitySemRelPropagation-Dataset - eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc - --sourcePath${sourcePath}/dataset - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - --saveGraph${saveGraph} - - - - - - - - yarn - cluster - Result2CommunitySemRelPropagation-ORP - eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc - --sourcePath${sourcePath}/otherresearchproduct - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - --saveGraph${saveGraph} - - - - - - - - yarn - cluster - Result2CommunitySemRelPropagation-Software - eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc - --sourcePath${sourcePath}/software - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - --saveGraph${saveGraph} - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json deleted file mode 100644 index 5fe92cff13..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - },{ - "paramName": "o", - "paramLongName": "outputPath", - "paramDescription": "institutional repositories that should not be considered for the propagation", - "paramRequired": false -} -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json deleted file mode 100644 index 3f4b1d151b..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - - { - "paramName":"wp", - "paramLongName":"workingPath", - "paramDescription": "the working path", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - },{ - "paramName": "bl", - "paramLongName": "blacklist", - "paramDescription": "institutional repositories that should not be considered for the propagation", - "paramRequired": false -} -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json deleted file mode 100644 index d2b076c827..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json +++ /dev/null @@ -1,56 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, - { - "paramName":"sg", - "paramLongName":"saveGraph", - "paramDescription": "true if the new version of the graph must be saved", - "paramRequired": false - }, - { - "paramName":"dop", - "paramLongName":"datasourceOrganizationPath", - "paramDescription": "path where to store/find association from datasource and organization", - "paramRequired": true - }, - { - "paramName":"alp", - "paramLongName":"alreadyLinkedPath", - "paramDescription": "path where to store/find already linked results and organizations", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "the path where prepared info have been stored", - "paramRequired": false - }, - { - "paramName": "test", - "paramLongName": "isTest", - "paramDescription": "true if it is a test running", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92ba..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml deleted file mode 100644 index edfff8817d..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ /dev/null @@ -1,277 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - sets the outputPath - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/publication - ${nameNode}/${outputPath}/publication - - - - - - - - ${nameNode}/${sourcePath}/dataset - ${nameNode}/${outputPath}/dataset - - - - - - - - ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${outputPath}/otherresearchproduct - - - - - - - - ${nameNode}/${sourcePath}/software - ${nameNode}/${outputPath}/software - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn - cluster - PrepareResultOrganizationAssociation - eu.dnetlib.dhp.resulttoorganizationfrominstrepo.PrepareResultInstRepoAssociation - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - --blacklist${blacklist} - - - - - - - - - - - - - - - yarn - cluster - resultToOrganizationFromInstRepoPropagationForPublications - eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/publication - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - - - - - - - - yarn - cluster - resultToOrganizationFromInstRepoPropagationForDataset - eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/dataset - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - - - - - - - - yarn - cluster - resultToOrganizationFromInstRepoPropagationForORP - eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/otherresearchproduct - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - - - - - - - - yarn - cluster - resultToOrganizationFromInstRepoPropagationForSoftware - eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --sourcePath${sourcePath}/software - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - --hive_metastore_uris${hive_metastore_uris} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 93e9e0ab1d..4cb759343c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,12 +1,12 @@ sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched -resumeFrom=AffiliationSemanticRelation +resumeFrom=CountryPropagation allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo -datasourceWhitelistForCountryPropagation=10|openaire____::3795d6478e30e2c9f787d427ff160944;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14 -allowedtypes=pubsrepository::institutional +datasourceWhitelistForCountryPropagation=10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48 +#allowedtypes=pubsrepository::institutional +allowedtypes=Institutional outputPath=/tmp/miriam/enrichment_one_step -organizationtoresultcommunitymap={"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|ukri________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|ukri________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|ukri________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|ukri________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|ukri________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|ukri________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"], "20|openorgs____::d11f981828c485cd23d93f7f24f24db1":["eut"], "20|openorgs____::e66fe5dd092752e1dd6fd29fc699933a":["eut"], "20|openorgs____::526468206bca24c1c90da6a312295cf4":["eut"], "20|openorgs____::08e311e656e65ccb32e07c66b15b6ff7":["eut"], "20|openorgs____::55a1f889758964b77682904218fdb298":["eut"], "20|openorgs____::530092b6970d60a5329beb9f39e8d7d4":["eut"], "20|openorgs____::aadafa39392b3e200102596a3a4aad9d":["eut"], "20|openorgs____::c3fe999c74fad308132b8a5971367dce":["eut"], "20|openorgs____::1624ff7c01bb641b91f4518539a0c28a":["aurora"], "20|openorgs____::cdda7cfe17c89eb50628ec2eb1f8acd2":["aurora"], "20|openorgs____::818b75030e0e40612d69e049843ede7e":["aurora"], "20|openorgs____::0b0102bae51f4f4ef5ba57fbe1523b92":["aurora"], "20|openorgs____::ed47496b44722f0e9d7b98898189be0d":["aurora"], "20|openorgs____::eb0669daa9efeb898a3090d8aac7c953":["aurora"], "20|openorgs____::eb391317ed0dc684aa81ac16265de041":["aurora"], "20|openorgs____::f7cfcc98245e22c7d6e321cde930e746":["aurora"], "20|openorgs____::f33179d3306ba2599f7a898b056b604f":["aurora"], "20|pending_org_::75c41e6dd18466709ef359323d96fa05":["aurora"]} pathMap ={"author":"$['author'][*]['fullname']", \ "title":"$['title'][*]['value']",\ "orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" ,\ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index de054b962b..8e91707b6e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -29,10 +29,6 @@ outputPath the output path - - organizationtoresultcommunitymap - organization community map - pathMap the json path associated to each selection field @@ -315,7 +311,7 @@ allowedtypes - ${allowedtupes} + ${allowedtypes} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml index 307997d4cd..6c51634484 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml @@ -26,12 +26,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml index 1fbaeb5d55..933bab7e06 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -25,12 +25,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml index dbb22b9948..05824d209b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -18,12 +18,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml index 93a2f98be3..f0db9c777f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -22,13 +22,21 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + + + + + + + + + yarn diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml index 8aec530cc9..6aeffb4574 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -21,12 +21,21 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + yarn @@ -75,9 +84,9 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList + --preparedInfoPath${workingDir}/communityorganization/preparedInfo/resultCommunityList --sourcePath${sourcePath}/ - --outputPath${workingDir}/resulttocommunityfromorganization/ + --outputPath${workingDir}/communityorganization/resulttocommunityfromorganization/ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml index 90ed2e0b6f..dd845064b2 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml @@ -21,12 +21,19 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml index be88c45bdc..773c7fba76 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -18,13 +18,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + + + + + + + + @@ -41,8 +48,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -70,8 +79,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -99,8 +110,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -128,8 +141,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -159,8 +174,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -191,8 +208,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -220,8 +239,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -249,8 +270,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -278,8 +301,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -295,10 +320,11 @@ --outputPath${workingDir}/communitysemrel/software - + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml index dadea2d280..e963453da9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -21,12 +21,21 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + @@ -79,7 +88,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/publication - --outputPath${workingDir}/affiliationinstrepo/publication/relation + --outputPath${workingDir}/affiliationInstRepo/publication/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -108,7 +117,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/dataset - --outputPath${workingDir}/affiliationinstrepo/dataset/relation + --outputPath${workingDir}/affiliationInstRepo/dataset/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -137,7 +146,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/otherresearchproduct - --outputPath${workingDir}/affiliationinstrepo/otherresearchproduct/relation + --outputPath${workingDir}/affiliationInstRepo/otherresearchproduct/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -166,7 +175,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/software - --outputPath${workingDir}/affiliationinstrepo/software/relation + --outputPath${workingDir}/affiliationInstRepo/software/relation --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} @@ -197,7 +206,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --outputPath${sourcePath}/relation - --sourcePath${workingDir}/affiliationinstrepo/ + --sourcePath${workingDir}/affiliationInstRepo/ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92ba..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml deleted file mode 100644 index 7918df120b..0000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app/workflow.xml +++ /dev/null @@ -1,97 +0,0 @@ - - - - sourcePath - the source path - - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - yarn - cluster - PrepareResultOrganizationAssociation - eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --graphPath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --leavesPath${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath - --childParentPath${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath - --resultOrgPath${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath - --relationPath${workingDir}/affiliationSemanticRelation/preparedInfo/relation - - - - - - - - yarn - cluster - resultToOrganizationFromSemRel - eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.sql.shuffle.partitions=3840 - - --relationPath${workingDir}/affiliationSemanticRelation/preparedInfo/relation - --outputPath${sourcePath} - --leavesPath${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath - --childParentPath${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath - --resultOrgPath${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath - --hive_metastore_uris${hive_metastore_uris} - --workingDir${workingDir}/affiliationSemanticRelation/working - --iterations${iterations} - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh index 35220bd8c9..9877fe7de5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh @@ -1,3 +1,3 @@ #!/bin/bash curl -LSs $1 | hdfs dfs -put - $2/$3 -curl -LSs http://api.crossref.org/works/10.1099/jgv.0.001453 > prova.txt \ No newline at end of file +#curl -LSs http://api.crossref.org/works/10.1099/jgv.0.001453 > prova.txt \ No newline at end of file From 5011c4d11a4c3884c99d784ed31a336ba89f8bfc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 Dec 2023 15:57:26 +0100 Subject: [PATCH 10/56] refactoring after compiletion --- .../provision/IndexRecordTransformerTest.java | 2 +- .../dhp/oa/provision/XmlIndexingJobTest.java | 29 +++++++++---------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e07ba1b4ea..e728830554 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -82,7 +82,7 @@ public class IndexRecordTransformerTest { void testPeerReviewed() throws IOException, TransformerException { final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, - XmlConverterJob.schemaLocation); + XmlConverterJob.schemaLocation); final Publication p = load("publication.json", Publication.class); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index b62acbac34..a3a140cf64 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.IOException; import java.io.StringReader; import java.net.URI; @@ -32,8 +34,6 @@ import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import static org.junit.jupiter.api.Assertions.assertEquals; - @ExtendWith(MockitoExtension.class) public class XmlIndexingJobTest extends SolrTest { @@ -110,34 +110,33 @@ public class XmlIndexingJobTest extends SolrTest { QueryResponse rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "*:*")); assertEquals( - nRecord, rsp.getResults().getNumFound(), - "the number of indexed records should be equal to the number of input records"); - + nRecord, rsp.getResults().getNumFound(), + "the number of indexed records should be equal to the number of input records"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isgreen:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having isgreen = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having isgreen = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "openaccesscolor:bronze")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having openaccesscolor = bronze"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having openaccesscolor = bronze"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isindiamondjournal:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having isindiamondjournal = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having isindiamondjournal = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "publiclyfunded:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having publiclyfunded = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having publiclyfunded = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "peerreviewed:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having peerreviewed = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having peerreviewed = true"); } @Test From 62104790ae63d08946f0e340d67f3182cb469b8d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 21 Dec 2023 12:26:19 +0100 Subject: [PATCH 11/56] added metaresourcetype to the result hive DB view --- .../graph/hive/oozie_app/lib/scripts/postprocessing.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql index 149c326faa..748f77b277 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql @@ -1,10 +1,10 @@ DROP VIEW IF EXISTS ${hiveDbName}.result; CREATE VIEW IF NOT EXISTS ${hiveDbName}.result as - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.publication p + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.publication p union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.dataset d + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.dataset d union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.software s + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.software s union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.otherresearchproduct o; + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.otherresearchproduct o; From 3afd4aa57bb107e35f71108c64c45ada698cf8a7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:27:30 +0100 Subject: [PATCH 12/56] adjustments for country propagation --- .../PrepareDatasourceCountryAssociation.java | 5 ++- .../PrepareResultCountrySet.java | 2 +- .../SparkCountryPropagationJob.java | 2 +- .../PrepareInfo.java | 2 +- .../input_countrypropagation_parameters.json | 32 ++++++++++++++++ .../input_prepareassoc_parameters.json | 32 ++++++++++++++++ ...input_prepareresultcountry_parameters.json | 38 +++++++++++++++++++ .../countrypropagation/oozie_app/workflow.xml | 35 ++++++++++------- 8 files changed, 130 insertions(+), 18 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 430c265924..a016509e57 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -45,7 +45,7 @@ public class PrepareDatasourceCountryAssociation { .toString( PrepareDatasourceCountryAssociation.class .getResourceAsStream( - "/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -90,7 +90,8 @@ public class PrepareDatasourceCountryAssociation { (FilterFunction) ds -> !ds.getDataInfo().getDeletedbyinference() && Optional.ofNullable(ds.getDatasourcetype()).isPresent() && Optional.ofNullable(ds.getDatasourcetype().getClassid()).isPresent() && - (allowedtypes.contains(ds.getJurisdiction().getClassid()) || + ((Optional.ofNullable(ds.getJurisdiction()).isPresent() && + allowedtypes.contains(ds.getJurisdiction().getClassid())) || whitelist.contains(ds.getId()))); // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 184d24751b..884aa0e47e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -32,7 +32,7 @@ public class PrepareResultCountrySet { .toString( PrepareResultCountrySet.class .getResourceAsStream( - "/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 17247f8125..92930c18bd 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -35,7 +35,7 @@ public class SparkCountryPropagationJob { .toString( SparkCountryPropagationJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java index 8d3432f062..bdfdde13bd 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java @@ -60,7 +60,7 @@ public class PrepareInfo implements Serializable { .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json")); + "/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json new file mode 100644 index 0000000000..d3cde8b747 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json @@ -0,0 +1,32 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "preparedInfoPath", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } +] diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json new file mode 100644 index 0000000000..a00105f2ba --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json @@ -0,0 +1,32 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "whitelist", + "paramDescription": "the datasource having a type different from the allowed ones but that we want to add anyway", + "paramRequired": true + }, + { + "paramName": "at", + "paramLongName": "allowedtypes", + "paramDescription": "the allowed datasource types for country propagation", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json new file mode 100644 index 0000000000..18163d1f96 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json @@ -0,0 +1,38 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"out", + "paramLongName":"outputPath", + "paramDescription": "the output path", + "paramRequired": true + }, + { + "paramName":"w", + "paramLongName":"workingPath", + "paramDescription": "the working path", + "paramRequired": true + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "preparedInfoPath", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml index 933bab7e06..81d6dc3dc1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -61,7 +61,7 @@ --sourcePath${sourcePath} --whitelist${whitelist} --allowedtypes${allowedtypes} - --workingPath${workingDir}/country + --outputPath${workingDir}/preparedInfo @@ -95,8 +95,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/publication - --workingPath${workingDir}/country + --outputPath${workingDir}/publication + --workingPath${workingDir}/workingP --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --preparedInfoPath${workingDir}/preparedInfo @@ -123,8 +125,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/dataset - --workingPath${workingDir}/country + --outputPath${workingDir}/dataset + --workingPath${workingDir}/workingD --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --preparedInfoPath${workingDir}/preparedInfo @@ -151,8 +155,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/otherresearchproduct - --workingPath${workingDir}/country + --outputPath${workingDir}/otherresearchproduct + --workingPath${workingDir}/workingO --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --preparedInfoPath${workingDir}/preparedInfo @@ -179,14 +185,16 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/software - --workingPath${workingDir}/country + --outputPath${workingDir}/software + --workingPath${workingDir}/workingS --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --preparedInfoPath${workingDir}/preparedInfo - + @@ -216,9 +224,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/publication - --workingPath${workingDir}/country + --preparedInfoPath${workingDir}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - + --outputPath${workingDir}/country/publication @@ -245,9 +253,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/dataset - --workingPath${workingDir}/country + --preparedInfoPath${workingDir}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - + --outputPath${workingDir}/country/dataset @@ -274,9 +282,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/otherresearchproduct - --workingPath${workingDir}/country + --preparedInfoPath${workingDir}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - + --outputPath${workingDir}/country/otherresearchproduct @@ -303,8 +311,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/software - --workingPath${workingDir}/country + --preparedInfoPath${workingDir}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/country/software From b06aea0adfe716fede41a6fd38e847dc90dd4692 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:35:37 +0100 Subject: [PATCH 13/56] adding the bulkTag parameter file in the folder for the oozie workflow for bulkTagging. Changes the path in the class --- .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 2 +- .../bulktag/input_bulkTag_parameters.json | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 51307ccd1e..e20fcb081a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -45,7 +45,7 @@ public class SparkBulkTagJob { .toString( SparkBulkTagJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json new file mode 100644 index 0000000000..ce1a8ecab6 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json @@ -0,0 +1,38 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "pm", + "paramLongName":"pathMap", + "paramDescription": "the json path associated to each selection field", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "tg", + "paramLongName": "taggingConf", + "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", + "paramRequired": false + }, + { + "paramName": "bu", + "paramLongName": "baseURL", + "paramDescription": "this parameter is to specify the api to be queried (beta or production)", + "paramRequired": false + } +] \ No newline at end of file From 89f269c7f4b63070358724213b5d39fac0678916 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:37:50 +0100 Subject: [PATCH 14/56] changed the path to the parameter file in the class for entitytoorganization propagation --- .../SparkEntityToOrganizationFromSemRel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java index 87c0ec2b9d..4e30a6d6a8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java @@ -39,7 +39,7 @@ public class SparkEntityToOrganizationFromSemRel implements Serializable { .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json")); + "/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); From 009730b3d1616fa3337cad380b9ff8e55641c9a5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:42:09 +0100 Subject: [PATCH 15/56] added properties file in the forlder for the workflow of orcid propagation. Changes the path in the classes implementing the propagationchanged the path to the parameter file in the class for entitytoorganization propagation --- .../PrepareResultOrcidAssociationStep1.java | 2 +- .../PrepareResultOrcidAssociationStep2.java | 2 +- .../SparkOrcidToResultFromSemRelJob.java | 2 +- .../input_orcidtoresult_parameters.json | 44 +++++++++++++++++++ ...input_prepareorcidtoresult_parameters.json | 38 ++++++++++++++++ ...nput_prepareorcidtoresult_parameters2.json | 20 +++++++++ 6 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 95b870292d..bc72a2ae1b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -31,7 +31,7 @@ public class PrepareResultOrcidAssociationStep1 { .toString( PrepareResultOrcidAssociationStep1.class .getResourceAsStream( - "/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConf); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java index c60012a748..46894d0e1b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java @@ -29,7 +29,7 @@ public class PrepareResultOrcidAssociationStep2 { .toString( PrepareResultOrcidAssociationStep2.class .getResourceAsStream( - "/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json")); + "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 5f9260e5dc..c5d6326581 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -36,7 +36,7 @@ public class SparkOrcidToResultFromSemRelJob { .toString( SparkOrcidToResultFromSemRelJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json new file mode 100644 index 0000000000..3cbaa23bb6 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json @@ -0,0 +1,44 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"sg", + "paramLongName":"saveGraph", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName":"pu", + "paramLongName":"possibleUpdatesPath", + "paramDescription": "the path the the association resultId orcid author list can be found", + "paramRequired": true + }, + { + "paramName":"test", + "paramLongName":"isTest", + "paramDescription": "true if it is executing a test", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json new file mode 100644 index 0000000000..08648d61a1 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json @@ -0,0 +1,38 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"as", + "paramLongName":"allowedsemrels", + "paramDescription": "the allowed sematinc relations for propagation", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json new file mode 100644 index 0000000000..1a67134a6b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json @@ -0,0 +1,20 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } +] \ No newline at end of file From f2352e8a78017f26f297833546e1a0853c5a89b7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:43:34 +0100 Subject: [PATCH 16/56] changed in the classes the path for the property files for the propagation of community from project --- .../resulttocommunityfromproject/PrepareResultCommunitySet.java | 2 +- .../SparkResultToCommunityFromProject.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java index 467e11a969..512dfa9bed 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java @@ -38,7 +38,7 @@ public class PrepareResultCommunitySet { .toString( PrepareResultCommunitySet.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index 229ac7e32e..dde5340617 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -44,7 +44,7 @@ public class SparkResultToCommunityFromProject implements Serializable { .toString( SparkResultToCommunityFromProject.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); From 2f7b9ad815358857dd14656ae1e4b160e7721662 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 11:46:15 +0100 Subject: [PATCH 17/56] added properties file in the forlder for the workflow of project to result propagation. Changes the path in the classes implementing the propagation --- .../PrepareProjectResultsAssociation.java | 2 +- .../SparkResultToProjectThroughSemRelJob.java | 2 +- ...put_prepareprojecttoresult_parameters.json | 33 ++++++++++++++ .../input_projecttoresult_parameters.json | 44 +++++++++++++++++++ 4 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java index ac61e26f94..8f4e2ad9a5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java @@ -28,7 +28,7 @@ public class PrepareProjectResultsAssociation { .toString( PrepareProjectResultsAssociation.class .getResourceAsStream( - "/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 1ec521af18..e7518673d8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -33,7 +33,7 @@ public class SparkResultToProjectThroughSemRelJob { .toString( SparkResultToProjectThroughSemRelJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json new file mode 100644 index 0000000000..a70dbd6a08 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json @@ -0,0 +1,33 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName":"asr", + "paramLongName":"allowedsemrels", + "paramDescription": "the types of the allowed datasources. Split by ;", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"pu", + "paramLongName":"potentialUpdatePath", + "paramDescription": "the path of the potential updates ", + "paramRequired": true + }, + { + "paramName":"al", + "paramLongName":"alreadyLinkedPath", + "paramDescription": "the path of the already linked project result_set", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json new file mode 100644 index 0000000000..7f44ba03cd --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json @@ -0,0 +1,44 @@ +[ + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"sg", + "paramLongName":"saveGraph", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"pu", + "paramLongName":"potentialUpdatePath", + "paramDescription": "the path of the potential updates ", + "paramRequired": true + }, + { + "paramName":"al", + "paramLongName":"alreadyLinkedPath", + "paramDescription": "the path of the already linked project result_set", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "test", + "paramLongName": "isTest", + "paramDescription": "true if it is a test running", + "paramRequired": false + } +] \ No newline at end of file From 2f3b5a133d4ddfc4ed6a38366c927330d2c25b08 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 13:56:40 +0100 Subject: [PATCH 18/56] added properties file in the forlder for the workflow of result to community from organization propagation. Changes the path in the classes implementing the propagation --- .../PrepareResultCommunitySet.java | 2 +- ...kResultToCommunityFromOrganizationJob.java | 2 +- .../input_communitytoresult_parameters.json | 28 ++++++++++++++++ ...t_preparecommunitytoresult_parameters.json | 33 +++++++++++++++++++ 4 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index 54fa601681..be31cd46cc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -34,7 +34,7 @@ public class PrepareResultCommunitySet { .toString( PrepareResultCommunitySet.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index adb7feef7e..cc87b80e5e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -36,7 +36,7 @@ public class SparkResultToCommunityFromOrganizationJob { .toString( SparkResultToCommunityFromOrganizationJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json new file mode 100644 index 0000000000..0db8085d19 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json @@ -0,0 +1,28 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "p", + "paramLongName": "preparedInfoPath", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": true + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json new file mode 100644 index 0000000000..3601db7acc --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json @@ -0,0 +1,33 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "bu", + "paramLongName": "baseURL", + "paramDescription": "the base URL to the community API to use", + "paramRequired": false + } + +] \ No newline at end of file From 9f966b59d446ba83d9dd002dddaf1d9585a3b037 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 14:11:47 +0100 Subject: [PATCH 19/56] added properties file in the forlder for the workflow of result to community from semrel propagation. Changes the path in the classes implementing the propagation --- .../PrepareResultCommunitySetStep1.java | 2 +- .../PrepareResultCommunitySetStep2.java | 2 +- ...parkResultToCommunityThroughSemRelJob.java | 2 +- .../input_communitytoresult_parameters.json | 52 +++++++++++++++++++ ..._preparecommunitytoresult2_parameters.json | 20 +++++++ ...t_preparecommunitytoresult_parameters.json | 44 ++++++++++++++++ 6 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index 40c074a6e5..aede9ef05b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -61,7 +61,7 @@ public class PrepareResultCommunitySetStep1 { .toString( PrepareResultCommunitySetStep1.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java index 0ddb19a1ac..a53d3dfe32 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java @@ -31,7 +31,7 @@ public class PrepareResultCommunitySetStep2 { .toString( PrepareResultCommunitySetStep2.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index a107378499..4929c7582d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -33,7 +33,7 @@ public class SparkResultToCommunityThroughSemRelJob { .toString( SparkResultToCommunityThroughSemRelJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json new file mode 100644 index 0000000000..a40ce375ee --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json @@ -0,0 +1,52 @@ +[ + + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"sg", + "paramLongName":"saveGraph", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "preparedInfoPath", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": true + }, + { + "paramName":"test", + "paramLongName":"isTest", + "paramDescription": "true if it is executing a test", + "paramRequired": false + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json new file mode 100644 index 0000000000..3ba3c8e9c7 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json new file mode 100644 index 0000000000..c6389ec8da --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json @@ -0,0 +1,44 @@ +[ + { + "paramName":"bu", + "paramLongName":"baseURL", + "paramDescription": "URL of the isLookUp Service", + "paramRequired": true + }, + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"as", + "paramLongName":"allowedsemrels", + "paramDescription": "the allowed semantic relations for propagation", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + } +] \ No newline at end of file From cb14470ba6779bd6f5dea3e1b937512295c0854a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 Dec 2023 14:50:05 +0100 Subject: [PATCH 20/56] added properties file in the forlder for the workflow of result to organization from inst repo propagation. Changes the path in the classes implementing the propagation --- .../AppendNewRelations.java | 2 +- .../PrepareResultInstRepoAssociation.java | 2 +- ...arkResultToOrganizationFromIstRepoJob.java | 2 +- .../input_newrelation_parameters.json | 20 +++++++ .../input_prepareresultorg_parameters.json | 32 +++++++++++ ...sulaffiliationfrominstrepo_parameters.json | 56 +++++++++++++++++++ 6 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java index 636c14b655..11e9421426 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java @@ -30,7 +30,7 @@ public class AppendNewRelations implements Serializable { .toString( AppendNewRelations.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index deec6fedc6..57488bd209 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -40,7 +40,7 @@ public class PrepareResultInstRepoAssociation { .toString( PrepareResultInstRepoAssociation.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index bbad20e2d5..c8862b10c4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -47,7 +47,7 @@ public class SparkResultToOrganizationFromIstRepoJob { .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json new file mode 100644 index 0000000000..5fe92cff13 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + },{ + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "institutional repositories that should not be considered for the propagation", + "paramRequired": false +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json new file mode 100644 index 0000000000..3f4b1d151b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json @@ -0,0 +1,32 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the working path", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + },{ + "paramName": "bl", + "paramLongName": "blacklist", + "paramDescription": "institutional repositories that should not be considered for the propagation", + "paramRequired": false +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json new file mode 100644 index 0000000000..d2b076c827 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json @@ -0,0 +1,56 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hive_metastore_uris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName":"sg", + "paramLongName":"saveGraph", + "paramDescription": "true if the new version of the graph must be saved", + "paramRequired": false + }, + { + "paramName":"dop", + "paramLongName":"datasourceOrganizationPath", + "paramDescription": "path where to store/find association from datasource and organization", + "paramRequired": true + }, + { + "paramName":"alp", + "paramLongName":"alreadyLinkedPath", + "paramDescription": "path where to store/find already linked results and organizations", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + }, + { + "paramName": "test", + "paramLongName": "isTest", + "paramDescription": "true if it is a test running", + "paramRequired": false + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + } +] \ No newline at end of file From 02636e802c26c284efa1415d168815c5b23ed655 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Mon, 2 Oct 2023 09:25:12 +0200 Subject: [PATCH 21/56] SparkCreateSimRels: - Create dedup blocks from the complete queue of records matching cluster key instead of truncating the results - Clean titles once before clustering and similarity comparisons - Added support for filtered fields in model - Added support for sorting List fields in model - Added new JSONListClustering and numAuthorsTitleSuffixPrefixChain clustering functions - Added new maxLengthMatch comparator function - Use reduced complexity Levenshtein with threshold in levensteinTitle - Use reduced complexity AuthorsMatch with threshold early-quit - Use incremental Connected Component to decrease comparisons in similarity match in BlockProcessor - Use new clusterings configuration in Dedup tests SparkWhitelistSimRels: use left semi join for clarity and performance SparkCreateMergeRels: - Use new connected component algorithm that converge faster than Spark GraphX provided algorithm - Refactored to use Windowing sorting rather than groupBy to reduce memory pressure - Use historical pivot table to generate singleton rels, merged rels and keep continuity with dedupIds used in the past - Comparator for pivot record selection now uses "tomorrow" as filler for missing or incorrect date instead of "2000-01-01" - Changed generation of ids of type dedup_wf_001 to avoid collisions DedupRecordFactory: use reduceGroups instead of mapGroups to decrease memory pressure --- .../AbstractClusteringFunction.java | 23 +- .../eu/dnetlib/pace/clustering/Acronyms.java | 2 +- .../pace/clustering/ClusteringFunction.java | 2 +- .../pace/clustering/ImmutableFieldValue.java | 2 +- .../pace/clustering/JSONListClustering.java | 69 ++++ .../pace/clustering/KeywordsClustering.java | 12 +- .../pace/clustering/LastNameFirstInitial.java | 7 +- .../pace/clustering/LowercaseClustering.java | 2 +- .../dnetlib/pace/clustering/NgramPairs.java | 4 +- .../eu/dnetlib/pace/clustering/Ngrams.java | 4 +- .../NumAuthorsTitleSuffixPrefixChain.java | 113 ++++++ .../pace/clustering/PersonClustering.java | 6 +- .../dnetlib/pace/clustering/PersonHash.java | 2 +- .../clustering/RandomClusteringFunction.java | 2 +- .../pace/clustering/SortedNgramPairs.java | 7 +- .../clustering/SpaceTrimmingFieldValue.java | 4 +- .../dnetlib/pace/clustering/SuffixPrefix.java | 2 +- .../pace/clustering/UrlClustering.java | 14 +- .../WordsStatsSuffixPrefixChain.java | 2 +- .../pace/clustering/WordsSuffixPrefix.java | 2 +- .../pace/common/AbstractPaceFunctions.java | 66 ++-- .../eu/dnetlib/pace/model/ClusteringDef.java | 6 +- .../java/eu/dnetlib/pace/model/FieldDef.java | 35 ++ .../eu/dnetlib/pace/model/SparkDeduper.scala | 38 +- .../eu/dnetlib/pace/model/SparkModel.scala | 46 ++- .../eu/dnetlib/pace/tree/AuthorsMatch.java | 25 +- .../dnetlib/pace/tree/InstanceTypeMatch.java | 2 +- .../eu/dnetlib/pace/tree/LevensteinTitle.java | 20 +- .../eu/dnetlib/pace/tree/MaxLengthMatch.java | 29 ++ .../pace/tree/support/AbstractComparator.java | 10 + .../eu/dnetlib/pace/util/BlockProcessor.java | 24 +- .../util/IncrementalConnectedComponents.java | 50 +++ .../eu/dnetlib/pace/util/MapDocumentUtil.java | 2 + .../eu/dnetlib/pace/util/PaceResolver.java | 2 +- .../clustering/ClusteringFunctionTest.java | 40 +-- .../IncrementalConnectedComponentsTest.java | 40 +++ .../dhp/oa/dedup/AbstractSparkAction.java | 4 + .../dhp/oa/dedup/DedupRecordFactory.java | 134 +++---- .../eu/dnetlib/dhp/oa/dedup/IdGenerator.java | 21 +- .../dhp/oa/dedup/SparkCreateMergeRels.java | 332 ++++++++++------- .../dhp/oa/dedup/SparkWhitelistSimRels.java | 16 +- .../oa/dedup/graph/ConnectedComponent.java | 100 ------ .../dhp/oa/dedup/graph/GraphProcessor.scala | 37 -- .../dhp/oa/dedup/model/Identifier.java | 18 +- .../dhp/oa/dedup/createCC_parameters.json | 12 + .../dedup/scan/oozie_app/config-default.xml | 4 + .../dhp/oa/dedup/scan/oozie_app/workflow.xml | 2 + .../kwartile/lib/cc/ConnectedComponent.scala | 335 ++++++++++++++++++ .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 160 +++++++-- .../dnetlib/dhp/dedup/conf/ds.curr.conf.json | 3 +- .../dnetlib/dhp/dedup/conf/orp.curr.conf.json | 3 +- .../dnetlib/dhp/dedup/conf/pub.curr.conf.json | 49 ++- .../dnetlib/dhp/dedup/conf/sw.curr.conf.json | 3 +- .../dedup/pivot_history/pivot_history.json | 1 + pom.xml | 20 ++ 55 files changed, 1437 insertions(+), 533 deletions(-) create mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java create mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java create mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/MaxLengthMatch.java create mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/util/IncrementalConnectedComponents.java create mode 100644 dhp-pace-core/src/test/java/eu/dnetlib/pace/util/IncrementalConnectedComponentsTest.java delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java delete mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/GraphProcessor.scala create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/scala/com/kwartile/lib/cc/ConnectedComponent.scala create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/pivot_history/pivot_history.json diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java index 3da8eb4900..e971ec5bb0 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java @@ -14,9 +14,9 @@ import eu.dnetlib.pace.config.Config; public abstract class AbstractClusteringFunction extends AbstractPaceFunctions implements ClusteringFunction { - protected Map params; + protected Map params; - public AbstractClusteringFunction(final Map params) { + public AbstractClusteringFunction(final Map params) { this.params = params; } @@ -27,7 +27,7 @@ public abstract class AbstractClusteringFunction extends AbstractPaceFunctions i return fields .stream() .filter(f -> !f.isEmpty()) - .map(this::normalize) + .map(s -> normalize(s)) .map(s -> filterAllStopWords(s)) .map(s -> doApply(conf, s)) .map(c -> filterBlacklisted(c, ngramBlacklist)) @@ -36,11 +36,24 @@ public abstract class AbstractClusteringFunction extends AbstractPaceFunctions i .collect(Collectors.toCollection(HashSet::new)); } - public Map getParams() { + public Map getParams() { return params; } protected Integer param(String name) { - return params.get(name); + Object val = params.get(name); + if (val == null) + return null; + if (val instanceof Number) { + return ((Number) val).intValue(); + } + return Integer.parseInt(val.toString()); + } + + protected int paramOrDefault(String name, int i) { + Integer res = param(name); + if (res == null) + res = i; + return res; } } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java index 9072fbb4b2..b5db27106d 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java @@ -13,7 +13,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("acronyms") public class Acronyms extends AbstractClusteringFunction { - public Acronyms(Map params) { + public Acronyms(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java index 8b78524182..269de867d4 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java @@ -11,6 +11,6 @@ public interface ClusteringFunction { public Collection apply(Config config, List fields); - public Map getParams(); + public Map getParams(); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java index bc8844aee0..cbfcde266c 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java @@ -12,7 +12,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("immutablefieldvalue") public class ImmutableFieldValue extends AbstractClusteringFunction { - public ImmutableFieldValue(final Map params) { + public ImmutableFieldValue(final Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java new file mode 100644 index 0000000000..e00092bd0c --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java @@ -0,0 +1,69 @@ + +package eu.dnetlib.pace.clustering; + +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; + +import com.jayway.jsonpath.Configuration; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.Option; + +import eu.dnetlib.pace.common.AbstractPaceFunctions; +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.util.MapDocumentUtil; + +@ClusteringClass("jsonlistclustering") +public class JSONListClustering extends AbstractPaceFunctions implements ClusteringFunction { + + private Map params; + + public JSONListClustering(Map params) { + this.params = params; + } + + @Override + public Map getParams() { + return params; + } + + @Override + public Collection apply(Config conf, List fields) { + return fields + .stream() + .filter(f -> !f.isEmpty()) + .map(s -> doApply(conf, s)) + .filter(StringUtils::isNotBlank) + .collect(Collectors.toCollection(HashSet::new)); + } + + private String doApply(Config conf, String json) { + StringBuilder st = new StringBuilder(); // to build the string used for comparisons basing on the jpath into + // parameters + final DocumentContext documentContext = JsonPath + .using(Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS)) + .parse(json); + + // for each path in the param list + for (String key : params.keySet().stream().filter(k -> k.contains("jpath")).collect(Collectors.toList())) { + String path = params.get(key).toString(); + String value = MapDocumentUtil.getJPathString(path, documentContext); + if (value == null || value.isEmpty()) + value = ""; + st.append(value); + st.append(" "); + } + + st.setLength(st.length() - 1); + + if (StringUtils.isBlank(st)) { + return "1"; + } + return st.toString(); + } +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java index 38299adb43..fdd8d1fb12 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java @@ -11,7 +11,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("keywordsclustering") public class KeywordsClustering extends AbstractClusteringFunction { - public KeywordsClustering(Map params) { + public KeywordsClustering(Map params) { super(params); } @@ -19,8 +19,8 @@ public class KeywordsClustering extends AbstractClusteringFunction { protected Collection doApply(final Config conf, String s) { // takes city codes and keywords codes without duplicates - Set keywords = getKeywords(s, conf.translationMap(), params.getOrDefault("windowSize", 4)); - Set cities = getCities(s, params.getOrDefault("windowSize", 4)); + Set keywords = getKeywords(s, conf.translationMap(), paramOrDefault("windowSize", 4)); + Set cities = getCities(s, paramOrDefault("windowSize", 4)); // list of combination to return as result final Collection combinations = new LinkedHashSet(); @@ -28,7 +28,7 @@ public class KeywordsClustering extends AbstractClusteringFunction { for (String keyword : keywordsToCodes(keywords, conf.translationMap())) { for (String city : citiesToCodes(cities)) { combinations.add(keyword + "-" + city); - if (combinations.size() >= params.getOrDefault("max", 2)) { + if (combinations.size() >= paramOrDefault("max", 2)) { return combinations; } } @@ -42,8 +42,8 @@ public class KeywordsClustering extends AbstractClusteringFunction { return fields .stream() .filter(f -> !f.isEmpty()) - .map(this::cleanup) - .map(this::normalize) + .map(KeywordsClustering::cleanup) + .map(KeywordsClustering::normalize) .map(s -> filterAllStopWords(s)) .map(s -> doApply(conf, s)) .map(c -> filterBlacklisted(c, ngramBlacklist)) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java index 5a385961a6..9692f57624 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java @@ -16,7 +16,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction { private boolean DEFAULT_AGGRESSIVE = true; - public LastNameFirstInitial(final Map params) { + public LastNameFirstInitial(final Map params) { super(params); } @@ -25,7 +25,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction { return fields .stream() .filter(f -> !f.isEmpty()) - .map(this::normalize) + .map(LastNameFirstInitial::normalize) .map(s -> doApply(conf, s)) .map(c -> filterBlacklisted(c, ngramBlacklist)) .flatMap(c -> c.stream()) @@ -33,8 +33,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction { .collect(Collectors.toCollection(HashSet::new)); } - @Override - protected String normalize(final String s) { + public static String normalize(final String s) { return fixAliases(transliterate(nfd(unicodeNormalization(s)))) // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input // strings diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java index a3a6c48819..807f41dd59 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java @@ -15,7 +15,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("lowercase") public class LowercaseClustering extends AbstractClusteringFunction { - public LowercaseClustering(final Map params) { + public LowercaseClustering(final Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java index aa06aa408e..bcc9667a8b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java @@ -12,11 +12,11 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("ngrampairs") public class NgramPairs extends Ngrams { - public NgramPairs(Map params) { + public NgramPairs(Map params) { super(params, false); } - public NgramPairs(Map params, boolean sorted) { + public NgramPairs(Map params, boolean sorted) { super(params, sorted); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java index 96c305a16a..7b862c729b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java @@ -10,11 +10,11 @@ public class Ngrams extends AbstractClusteringFunction { private final boolean sorted; - public Ngrams(Map params) { + public Ngrams(Map params) { this(params, false); } - public Ngrams(Map params, boolean sorted) { + public Ngrams(Map params, boolean sorted) { super(params); this.sorted = sorted; } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java new file mode 100644 index 0000000000..f1d1e17b90 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java @@ -0,0 +1,113 @@ + +package eu.dnetlib.pace.clustering; + +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import com.google.common.base.Splitter; +import com.google.common.collect.Sets; + +import eu.dnetlib.pace.config.Config; + +@ClusteringClass("numAuthorsTitleSuffixPrefixChain") +public class NumAuthorsTitleSuffixPrefixChain extends AbstractClusteringFunction { + + public NumAuthorsTitleSuffixPrefixChain(Map params) { + super(params); + } + + @Override + public Collection apply(Config conf, List fields) { + + try { + int num_authors = Math.min(Integer.parseInt(fields.get(0)), 21); // SIZE threshold is 20, +1 + + if (num_authors > 0) { + return super.apply(conf, fields.subList(1, fields.size())) + .stream() + .map(s -> num_authors + "-" + s) + .collect(Collectors.toList()); + } + } catch (NumberFormatException e) { + // missing or null authors array + } + + return Collections.emptyList(); + } + + @Override + protected Collection doApply(Config conf, String s) { + return suffixPrefixChain(cleanup(s), param("mod")); + } + + private Collection suffixPrefixChain(String s, int mod) { + // create the list of words from the string (remove short words) + List wordsList = Arrays + .stream(s.split(" ")) + .filter(si -> si.length() > 3) + .collect(Collectors.toList()); + + final int words = wordsList.size(); + final int letters = s.length(); + + // create the prefix: number of words + number of letters/mod + String prefix = words / mod + "-"; + + return doSuffixPrefixChain(wordsList, prefix); + + } + + private Collection doSuffixPrefixChain(List wordsList, String prefix) { + + Set set = Sets.newLinkedHashSet(); + switch (wordsList.size()) { + case 0: + break; + case 1: + set.add(wordsList.get(0)); + break; + case 2: + set + .add( + prefix + + suffix(wordsList.get(0), 3) + + prefix(wordsList.get(1), 3)); + + set + .add( + prefix + + prefix(wordsList.get(0), 3) + + suffix(wordsList.get(1), 3)); + + break; + default: + set + .add( + prefix + + suffix(wordsList.get(0), 3) + + prefix(wordsList.get(1), 3) + + suffix(wordsList.get(2), 3)); + + set + .add( + prefix + + prefix(wordsList.get(0), 3) + + suffix(wordsList.get(1), 3) + + prefix(wordsList.get(2), 3)); + break; + } + + return set; + + } + + private String suffix(String s, int len) { + return s.substring(s.length() - len); + } + + private String prefix(String s, int len) { + return s.substring(0, len); + } + +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java index b4a04ce65f..91b51bebbd 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java @@ -17,11 +17,11 @@ import eu.dnetlib.pace.model.Person; @ClusteringClass("personClustering") public class PersonClustering extends AbstractPaceFunctions implements ClusteringFunction { - private Map params; + private Map params; private static final int MAX_TOKENS = 5; - public PersonClustering(final Map params) { + public PersonClustering(final Map params) { this.params = params; } @@ -77,7 +77,7 @@ public class PersonClustering extends AbstractPaceFunctions implements Clusterin // } @Override - public Map getParams() { + public Map getParams() { return params; } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java index a3d58a9be3..09a112c37f 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java @@ -15,7 +15,7 @@ public class PersonHash extends AbstractClusteringFunction { private boolean DEFAULT_AGGRESSIVE = false; - public PersonHash(final Map params) { + public PersonHash(final Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java index 2aab926da4..3733dfc742 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java @@ -8,7 +8,7 @@ import eu.dnetlib.pace.config.Config; public class RandomClusteringFunction extends AbstractClusteringFunction { - public RandomClusteringFunction(Map params) { + public RandomClusteringFunction(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java index b085ae26d0..ca1b4189b3 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java @@ -1,7 +1,10 @@ package eu.dnetlib.pace.clustering; -import java.util.*; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; import com.google.common.base.Joiner; import com.google.common.base.Splitter; @@ -12,7 +15,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("sortedngrampairs") public class SortedNgramPairs extends NgramPairs { - public SortedNgramPairs(Map params) { + public SortedNgramPairs(Map params) { super(params, false); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java index 392aecc794..048380f7ed 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java @@ -15,7 +15,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("spacetrimmingfieldvalue") public class SpaceTrimmingFieldValue extends AbstractClusteringFunction { - public SpaceTrimmingFieldValue(final Map params) { + public SpaceTrimmingFieldValue(final Map params) { super(params); } @@ -25,7 +25,7 @@ public class SpaceTrimmingFieldValue extends AbstractClusteringFunction { res .add( - StringUtils.isBlank(s) ? RandomStringUtils.random(getParams().get("randomLength")) + StringUtils.isBlank(s) ? RandomStringUtils.random(param("randomLength")) : s.toLowerCase().replaceAll("\\s+", "")); return res; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java index 2a1c023a96..b6921e9f1a 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java @@ -12,7 +12,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("suffixprefix") public class SuffixPrefix extends AbstractClusteringFunction { - public SuffixPrefix(Map params) { + public SuffixPrefix(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java index 5b267ad106..34f41085b4 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java @@ -15,12 +15,17 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("urlclustering") public class UrlClustering extends AbstractPaceFunctions implements ClusteringFunction { - protected Map params; + protected Map params; - public UrlClustering(final Map params) { + public UrlClustering(final Map params) { this.params = params; } + @Override + public Map getParams() { + return params; + } + @Override public Collection apply(final Config conf, List fields) { try { @@ -35,11 +40,6 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu } } - @Override - public Map getParams() { - return null; - } - private URL asUrl(String value) { try { return new URL(value); diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java index c8e02f8f03..22351cf8ff 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java @@ -11,7 +11,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("wordsStatsSuffixPrefixChain") public class WordsStatsSuffixPrefixChain extends AbstractClusteringFunction { - public WordsStatsSuffixPrefixChain(Map params) { + public WordsStatsSuffixPrefixChain(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java index e606590a53..f9fef376bf 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java @@ -12,7 +12,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("wordssuffixprefix") public class WordsSuffixPrefix extends AbstractClusteringFunction { - public WordsSuffixPrefix(Map params) { + public WordsSuffixPrefix(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java index b440686ded..ba7639adad 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java @@ -16,7 +16,6 @@ import org.apache.commons.lang3.StringUtils; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.ibm.icu.text.Transliterator; @@ -27,7 +26,7 @@ import eu.dnetlib.pace.clustering.NGramUtils; * * @author claudio */ -public abstract class AbstractPaceFunctions { +public class AbstractPaceFunctions { // city map to be used when translating the city names into codes private static Map cityMap = AbstractPaceFunctions @@ -62,11 +61,14 @@ public abstract class AbstractPaceFunctions { private static Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})"); - protected String concat(final List l) { + private static Pattern romanNumberPattern = Pattern + .compile("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$"); + + protected static String concat(final List l) { return Joiner.on(" ").skipNulls().join(l); } - protected String cleanup(final String s) { + public static String cleanup(final String s) { final String s1 = HTML_REGEX.matcher(s).replaceAll(""); final String s2 = unicodeNormalization(s1.toLowerCase()); final String s3 = nfd(s2); @@ -82,7 +84,7 @@ public abstract class AbstractPaceFunctions { return s12; } - protected String fixXML(final String a) { + protected static String fixXML(final String a) { return a .replaceAll("–", " ") @@ -91,7 +93,7 @@ public abstract class AbstractPaceFunctions { .replaceAll("−", " "); } - protected boolean checkNumbers(final String a, final String b) { + protected static boolean checkNumbers(final String a, final String b) { final String numbersA = getNumbers(a); final String numbersB = getNumbers(b); final String romansA = getRomans(a); @@ -99,7 +101,7 @@ public abstract class AbstractPaceFunctions { return !numbersA.equals(numbersB) || !romansA.equals(romansB); } - protected String getRomans(final String s) { + protected static String getRomans(final String s) { final StringBuilder sb = new StringBuilder(); for (final String t : s.split(" ")) { sb.append(isRoman(t) ? t : ""); @@ -107,13 +109,12 @@ public abstract class AbstractPaceFunctions { return sb.toString(); } - protected boolean isRoman(final String s) { - return s - .replaceAll("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$", "qwertyuiop") - .equals("qwertyuiop"); + protected static boolean isRoman(final String s) { + Matcher m = romanNumberPattern.matcher(s); + return m.matches() && m.hitEnd(); } - protected String getNumbers(final String s) { + protected static String getNumbers(final String s) { final StringBuilder sb = new StringBuilder(); for (final String t : s.split(" ")) { sb.append(isNumber(t) ? t : ""); @@ -121,7 +122,7 @@ public abstract class AbstractPaceFunctions { return sb.toString(); } - public boolean isNumber(String strNum) { + public static boolean isNumber(String strNum) { if (strNum == null) { return false; } @@ -147,7 +148,7 @@ public abstract class AbstractPaceFunctions { } } - protected String removeSymbols(final String s) { + protected static String removeSymbols(final String s) { final StringBuilder sb = new StringBuilder(); s.chars().forEach(ch -> { @@ -157,11 +158,11 @@ public abstract class AbstractPaceFunctions { return sb.toString().replaceAll("\\s+", " "); } - protected boolean notNull(final String s) { + protected static boolean notNull(final String s) { return s != null; } - protected String normalize(final String s) { + public static String normalize(final String s) { return fixAliases(transliterate(nfd(unicodeNormalization(s)))) .toLowerCase() // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input @@ -174,16 +175,16 @@ public abstract class AbstractPaceFunctions { .trim(); } - public String nfd(final String s) { + public static String nfd(final String s) { return Normalizer.normalize(s, Normalizer.Form.NFD); } - public String utf8(final String s) { + public static String utf8(final String s) { byte[] bytes = s.getBytes(StandardCharsets.UTF_8); return new String(bytes, StandardCharsets.UTF_8); } - public String unicodeNormalization(final String s) { + public static String unicodeNormalization(final String s) { Matcher m = hexUnicodePattern.matcher(s); StringBuffer buf = new StringBuffer(s.length()); @@ -195,7 +196,7 @@ public abstract class AbstractPaceFunctions { return buf.toString(); } - protected String filterStopWords(final String s, final Set stopwords) { + protected static String filterStopWords(final String s, final Set stopwords) { final StringTokenizer st = new StringTokenizer(s); final StringBuilder sb = new StringBuilder(); while (st.hasMoreTokens()) { @@ -208,7 +209,7 @@ public abstract class AbstractPaceFunctions { return sb.toString().trim(); } - public String filterAllStopWords(String s) { + public static String filterAllStopWords(String s) { s = filterStopWords(s, stopwords_en); s = filterStopWords(s, stopwords_de); @@ -221,7 +222,8 @@ public abstract class AbstractPaceFunctions { return s; } - protected Collection filterBlacklisted(final Collection set, final Set ngramBlacklist) { + protected static Collection filterBlacklisted(final Collection set, + final Set ngramBlacklist) { final Set newset = Sets.newLinkedHashSet(); for (final String s : set) { if (!ngramBlacklist.contains(s)) { @@ -268,7 +270,7 @@ public abstract class AbstractPaceFunctions { return m; } - public String removeKeywords(String s, Set keywords) { + public static String removeKeywords(String s, Set keywords) { s = " " + s + " "; for (String k : keywords) { @@ -278,39 +280,39 @@ public abstract class AbstractPaceFunctions { return s.trim(); } - public double commonElementsPercentage(Set s1, Set s2) { + public static double commonElementsPercentage(Set s1, Set s2) { double longer = Math.max(s1.size(), s2.size()); return (double) s1.stream().filter(s2::contains).count() / longer; } // convert the set of keywords to codes - public Set toCodes(Set keywords, Map translationMap) { + public static Set toCodes(Set keywords, Map translationMap) { return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet()); } - public Set keywordsToCodes(Set keywords, Map translationMap) { + public static Set keywordsToCodes(Set keywords, Map translationMap) { return toCodes(keywords, translationMap); } - public Set citiesToCodes(Set keywords) { + public static Set citiesToCodes(Set keywords) { return toCodes(keywords, cityMap); } - protected String firstLC(final String s) { + protected static String firstLC(final String s) { return StringUtils.substring(s, 0, 1).toLowerCase(); } - protected Iterable tokens(final String s, final int maxTokens) { + protected static Iterable tokens(final String s, final int maxTokens) { return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens); } - public String normalizePid(String pid) { + public static String normalizePid(String pid) { return DOI_PREFIX.matcher(pid.toLowerCase()).replaceAll(""); } // get the list of keywords into the input string - public Set getKeywords(String s1, Map translationMap, int windowSize) { + public static Set getKeywords(String s1, Map translationMap, int windowSize) { String s = s1; @@ -340,7 +342,7 @@ public abstract class AbstractPaceFunctions { return codes; } - public Set getCities(String s1, int windowSize) { + public static Set getCities(String s1, int windowSize) { return getKeywords(s1, cityMap, windowSize); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java index d9ad81d42b..5ede2c3804 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java @@ -18,7 +18,7 @@ public class ClusteringDef implements Serializable { private List fields; - private Map params; + private Map params; public ClusteringDef() { } @@ -43,11 +43,11 @@ public class ClusteringDef implements Serializable { this.fields = fields; } - public Map getParams() { + public Map getParams() { return params; } - public void setParams(final Map params) { + public void setParams(final Map params) { this.params = params; } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java index f34545e6df..7ad9b74458 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java @@ -2,6 +2,7 @@ package eu.dnetlib.pace.model; import java.io.Serializable; +import java.util.HashSet; import java.util.List; import com.fasterxml.jackson.core.JsonProcessingException; @@ -36,6 +37,16 @@ public class FieldDef implements Serializable { */ private int length = -1; + private HashSet filter; + + private boolean sorted; + + public boolean isSorted() { + return sorted; + } + + private String clean; + public FieldDef() { } @@ -91,6 +102,30 @@ public class FieldDef implements Serializable { this.path = path; } + public HashSet getFilter() { + return filter; + } + + public void setFilter(HashSet filter) { + this.filter = filter; + } + + public boolean getSorted() { + return sorted; + } + + public void setSorted(boolean sorted) { + this.sorted = sorted; + } + + public String getClean() { + return clean; + } + + public void setClean(String clean) { + this.clean = clean; + } + @Override public String toString() { try { diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala index b3f56bcdbe..bc702b9e2d 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala @@ -5,9 +5,9 @@ import eu.dnetlib.pace.util.{BlockProcessor, SparkReporter} import org.apache.spark.SparkContext import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.expressions._ -import org.apache.spark.sql.functions.{col, lit, udf} +import org.apache.spark.sql.functions.{col, desc, expr, lit, udf} import org.apache.spark.sql.types._ -import org.apache.spark.sql.{Column, Dataset, Row, functions} +import org.apache.spark.sql.{Column, Dataset, Row, SaveMode, functions} import java.util.function.Predicate import java.util.stream.Collectors @@ -80,6 +80,8 @@ case class SparkDeduper(conf: DedupConfig) extends Serializable { .withColumn("key", functions.explode(clusterValuesUDF(cd).apply(functions.array(inputColumns: _*)))) // Add position column having the position of the row within the set of rows having the same key value ordered by the sorting value .withColumn("position", functions.row_number().over(Window.partitionBy("key").orderBy(col(model.orderingFieldName), col(model.identifierFieldName)))) + // .withColumn("count", functions.max("position").over(Window.partitionBy("key").orderBy(col(model.orderingFieldName), col(model.identifierFieldName)).rowsBetween(Window.unboundedPreceding,Window.unboundedFollowing) )) + // .filter("count > 1") if (df_with_clustering_keys == null) df_with_clustering_keys = ds @@ -88,20 +90,44 @@ case class SparkDeduper(conf: DedupConfig) extends Serializable { } //TODO: analytics + /*df_with_clustering_keys.groupBy(col("clustering"), col("key")) + .agg(expr("max(count) AS size")) + .orderBy(desc("size")) + .show*/ val df_with_blocks = df_with_clustering_keys - // filter out rows with position exceeding the maxqueuesize parameter - .filter(col("position").leq(conf.getWf.getQueueMaxSize)) - .groupBy("clustering", "key") + // split the clustering block into smaller blocks of queuemaxsize + .groupBy(col("clustering"), col("key"), functions.floor(col("position").divide(lit(conf.getWf.getQueueMaxSize)))) .agg(functions.collect_set(functions.struct(model.schema.fieldNames.map(col): _*)).as("block")) .filter(functions.size(new Column("block")).gt(1)) + .union( + //adjacency blocks + df_with_clustering_keys + // filter out leading and trailing elements + .filter(col("position").gt(conf.getWf.getSlidingWindowSize/2)) + //.filter(col("position").lt(col("count").minus(conf.getWf.getSlidingWindowSize/2))) + // create small blocks of records on "the border" of maxqueuesize: getSlidingWindowSize/2 elements before and after + .filter( + col("position").mod(conf.getWf.getQueueMaxSize).lt(conf.getWf.getSlidingWindowSize/2) // slice of the start of block + || col("position").mod(conf.getWf.getQueueMaxSize).gt(conf.getWf.getQueueMaxSize - (conf.getWf.getSlidingWindowSize/2)) //slice of the end of the block + ) + .groupBy(col("clustering"), col("key"), functions.floor((col("position") + lit(conf.getWf.getSlidingWindowSize/2)).divide(lit(conf.getWf.getQueueMaxSize)))) + .agg(functions.collect_set(functions.struct(model.schema.fieldNames.map(col): _*)).as("block")) + .filter(functions.size(new Column("block")).gt(1)) + ) df_with_blocks } def clusterValuesUDF(cd: ClusteringDef) = { udf[mutable.WrappedArray[String], mutable.WrappedArray[Any]](values => { - values.flatMap(f => cd.clusteringFunction().apply(conf, Seq(f.toString).asJava).asScala) + val valueList = values.flatMap { + case a: mutable.WrappedArray[Any] => a.map(_.toString) + case s: Any => Seq(s.toString) + }.asJava; + + mutable.WrappedArray.make(cd.clusteringFunction().apply(conf, valueList).toArray()) + }) } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala index aa997c6e9f..aa04188dae 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala @@ -1,13 +1,16 @@ package eu.dnetlib.pace.model import com.jayway.jsonpath.{Configuration, JsonPath} +import eu.dnetlib.pace.common.AbstractPaceFunctions import eu.dnetlib.pace.config.{DedupConfig, Type} import eu.dnetlib.pace.util.MapDocumentUtil +import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} import org.apache.spark.sql.{Dataset, Row} +import java.util.Locale import java.util.regex.Pattern import scala.collection.JavaConverters._ @@ -60,7 +63,7 @@ case class SparkModel(conf: DedupConfig) { values(identityFieldPosition) = MapDocumentUtil.getJPathString(conf.getWf.getIdPath, documentContext) schema.fieldNames.zipWithIndex.foldLeft(values) { - case ((res, (fname, index))) => { + case ((res, (fname, index))) => val fdef = conf.getPace.getModelMap.get(fname) if (fdef != null) { @@ -96,13 +99,52 @@ case class SparkModel(conf: DedupConfig) { case Type.DoubleArray => MapDocumentUtil.getJPathArray(fdef.getPath, json) } + + val filter = fdef.getFilter + + if (StringUtils.isNotBlank(fdef.getClean)) { + res(index) = res(index) match { + case x: Seq[String] => x.map(clean(_, fdef.getClean)).toSeq + case _ => clean(res(index).toString, fdef.getClean) + } + } + + if (filter != null && !filter.isEmpty) { + res(index) = res(index) match { + case x: String if filter.contains(x.toLowerCase(Locale.ROOT)) => null + case x: Seq[String] => x.filter(s => !filter.contains(s.toLowerCase(Locale.ROOT))).toSeq + case _ => res(index) + } + } + + if (fdef.getSorted) { + res(index) = res(index) match { + case x: Seq[String] => x.sorted.toSeq + case _ => res(index) + } + } } res - } } new GenericRowWithSchema(values, schema) } + + def clean(value: String, cleantype: String) : String = { + val res = cleantype match { + case "title" => AbstractPaceFunctions.cleanup(value) + case _ => value + } + +// if (!res.equals(AbstractPaceFunctions.normalize(value))) { +// println(res) +// println(AbstractPaceFunctions.normalize(value)) +// println() +// } + + res + } + } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java index 5c6939e601..edad0ae2e7 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java @@ -23,7 +23,6 @@ public class AuthorsMatch extends AbstractListComparator { private String MODE; // full or surname private int SIZE_THRESHOLD; private String TYPE; // count or percentage - private int common; public AuthorsMatch(Map params) { super(params, new com.wcohen.ss.JaroWinkler()); @@ -35,7 +34,6 @@ public class AuthorsMatch extends AbstractListComparator { FULLNAME_THRESHOLD = Double.parseDouble(params.getOrDefault("fullname_th", "0.9")); SIZE_THRESHOLD = Integer.parseInt(params.getOrDefault("size_th", "20")); TYPE = params.getOrDefault("type", "percentage"); - common = 0; } protected AuthorsMatch(double w, AbstractStringDistance ssalgo) { @@ -44,22 +42,27 @@ public class AuthorsMatch extends AbstractListComparator { @Override public double compare(final List a, final List b, final Config conf) { - if (a.isEmpty() || b.isEmpty()) return -1; if (a.size() > SIZE_THRESHOLD || b.size() > SIZE_THRESHOLD) return 1.0; - List aList = a.stream().map(author -> new Person(author, false)).collect(Collectors.toList()); + int maxMiss = Integer.MAX_VALUE; List bList = b.stream().map(author -> new Person(author, false)).collect(Collectors.toList()); - common = 0; + Double threshold = getDoubleParam("threshold"); + + if (threshold != null && threshold >= 0.0 && threshold <= 1.0 && a.size() == b.size()) { + maxMiss = (int) Math.floor((1 - threshold) * Math.max(a.size(), b.size())); + } + + int common = 0; // compare each element of List1 with each element of List2 - for (Person p1 : aList) + for (int i = 0; i < a.size(); i++) { + Person p1 = new Person(a.get(i), false); for (Person p2 : bList) { - // both persons are inaccurate if (!p1.isAccurate() && !p2.isAccurate()) { // compare just normalized fullnames @@ -118,11 +121,15 @@ public class AuthorsMatch extends AbstractListComparator { } } - } + if (i - common > maxMiss) { + return 0.0; + } + } + // normalization factor to compute the score - int normFactor = aList.size() == bList.size() ? aList.size() : (aList.size() + bList.size() - common); + int normFactor = a.size() == b.size() ? a.size() : (a.size() + b.size() - common); if (TYPE.equals("percentage")) { return (double) common / normFactor; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java index 238cb16cec..34ebcf7a7d 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java @@ -25,6 +25,7 @@ public class InstanceTypeMatch extends AbstractListComparator { translationMap.put("Conference object", "*"); translationMap.put("Other literature type", "*"); translationMap.put("Unknown", "*"); + translationMap.put("UNKNOWN", "*"); // article types translationMap.put("Article", "Article"); @@ -76,5 +77,4 @@ public class InstanceTypeMatch extends AbstractListComparator { protected double normalize(final double d) { return d; } - } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java index 877cb95abd..e2ee062b56 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java @@ -3,6 +3,7 @@ package eu.dnetlib.pace.tree; import java.util.Map; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -30,16 +31,25 @@ public class LevensteinTitle extends AbstractStringComparator { } @Override - public double distance(final String a, final String b, final Config conf) { - final String ca = cleanup(a); - final String cb = cleanup(b); - + public double distance(final String ca, final String cb, final Config conf) { final boolean check = checkNumbers(ca, cb); if (check) return 0.5; - return normalize(ssalgo.score(ca, cb), ca.length(), cb.length()); + Double threshold = getDoubleParam("threshold"); + + // reduce Levenshtein algo complexity when target threshold is known + if (threshold != null && threshold >= 0.0 && threshold <= 1.0) { + int maxdistance = (int) Math.floor((1 - threshold) * Math.max(ca.length(), cb.length())); + int score = StringUtils.getLevenshteinDistance(ca, cb, maxdistance); + if (score == -1) { + return 0; + } + return normalize(score, ca.length(), cb.length()); + } else { + return normalize(StringUtils.getLevenshteinDistance(ca, cb), ca.length(), cb.length()); + } } private double normalize(final double score, final int la, final int lb) { diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/MaxLengthMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/MaxLengthMatch.java new file mode 100644 index 0000000000..8f525c6d50 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/MaxLengthMatch.java @@ -0,0 +1,29 @@ + +package eu.dnetlib.pace.tree; + +import java.util.Map; + +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.tree.support.AbstractStringComparator; +import eu.dnetlib.pace.tree.support.ComparatorClass; + +@ComparatorClass("maxLengthMatch") +public class MaxLengthMatch extends AbstractStringComparator { + + private final int limit; + + public MaxLengthMatch(Map params) { + super(params); + + limit = Integer.parseInt(params.getOrDefault("limit", "200")); + } + + @Override + public double compare(String a, String b, final Config conf) { + return a.length() < limit && b.length() < limit ? 1.0 : -1.0; + } + + protected String toString(final Object object) { + return toFirstString(object); + } +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java index 8a957c5e32..cde73fd2b4 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java @@ -127,4 +127,14 @@ public abstract class AbstractComparator extends AbstractPaceFunctions implem return this.weight; } + public Double getDoubleParam(String name) { + String svalue = params.get(name); + + try { + return Double.parseDouble(svalue); + } catch (Throwable t) { + } + + return null; + } } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java index c2b0ddda7e..177ad73df7 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java @@ -67,8 +67,10 @@ public class BlockProcessor { private void processRows(final List queue, final Reporter context) { - for (int pivotPos = 0; pivotPos < queue.size(); pivotPos++) { - final Row pivot = queue.get(pivotPos); + IncrementalConnectedComponents icc = new IncrementalConnectedComponents(queue.size()); + + for (int i = 0; i < queue.size(); i++) { + final Row pivot = queue.get(i); final String idPivot = pivot.getString(identifierFieldPos); // identifier final Object fieldsPivot = getJavaValue(pivot, orderFieldPos); @@ -76,9 +78,9 @@ public class BlockProcessor { final WfConfig wf = dedupConf.getWf(); if (fieldPivot != null) { - int i = 0; - for (int windowPos = pivotPos + 1; windowPos < queue.size(); windowPos++) { - final Row curr = queue.get(windowPos); + for (int j = icc.nextUnconnected(i, i + 1); j >= 0 + && j < queue.size(); j = icc.nextUnconnected(i, j + 1)) { + final Row curr = queue.get(j); final String idCurr = curr.getString(identifierFieldPos); // identifier if (mustSkip(idCurr)) { @@ -86,7 +88,7 @@ public class BlockProcessor { break; } - if (++i > wf.getSlidingWindowSize()) { + if (wf.getSlidingWindowSize() > 0 && (j - i) > wf.getSlidingWindowSize()) { break; } @@ -97,7 +99,9 @@ public class BlockProcessor { final TreeProcessor treeProcessor = new TreeProcessor(dedupConf); - emitOutput(treeProcessor.compare(pivot, curr), idPivot, idCurr, context); + if (emitOutput(treeProcessor.compare(pivot, curr), idPivot, idCurr, context)) { + icc.connect(i, j); + } } } } @@ -115,7 +119,8 @@ public class BlockProcessor { return null; } - private void emitOutput(final boolean result, final String idPivot, final String idCurr, final Reporter context) { + private boolean emitOutput(final boolean result, final String idPivot, final String idCurr, + final Reporter context) { if (result) { if (idPivot.compareTo(idCurr) <= 0) { @@ -127,6 +132,8 @@ public class BlockProcessor { } else { context.incrementCounter(dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold(), 1); } + + return result; } private boolean mustSkip(final String idPivot) { @@ -142,5 +149,4 @@ public class BlockProcessor { context.emit(type, from, to); } - } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/IncrementalConnectedComponents.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/IncrementalConnectedComponents.java new file mode 100644 index 0000000000..ed35239a85 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/IncrementalConnectedComponents.java @@ -0,0 +1,50 @@ + +package eu.dnetlib.pace.util; + +import java.util.BitSet; + +public class IncrementalConnectedComponents { + final private int size; + + final private BitSet[] indexes; + + IncrementalConnectedComponents(int size) { + this.size = size; + this.indexes = new BitSet[size]; + } + + public void connect(int i, int j) { + if (indexes[i] == null) { + if (indexes[j] == null) { + indexes[i] = new BitSet(size); + } else { + indexes[i] = indexes[j]; + } + } else { + if (indexes[j] != null && indexes[i] != indexes[j]) { + // merge adjacency lists for i and j + indexes[i].or(indexes[j]); + } + } + + indexes[i].set(i); + indexes[i].set(j); + indexes[j] = indexes[i]; + } + + public int nextUnconnected(int i, int j) { + if (indexes[i] == null) { + return j; + } + int result = indexes[i].nextClearBit(j); + + return (result >= size) ? -1 : result; + } + + public BitSet getConnections(int i) { + if (indexes[i] == null) { + return null; + } + return indexes[i]; + } +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java index 28244cb3b6..7dc3406633 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java @@ -97,6 +97,8 @@ public class MapDocumentUtil { Object o = json.read(jsonPath); if (o instanceof String) return (String) o; + if (o instanceof Number) + return (String) o.toString(); if (o instanceof JSONArray && ((JSONArray) o).size() > 0) return (String) ((JSONArray) o).get(0); return ""; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java index 252205c79c..746892f0cf 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java @@ -40,7 +40,7 @@ public class PaceResolver implements Serializable { Collectors.toMap(cl -> cl.getAnnotation(ComparatorClass.class).value(), cl -> (Class) cl)); } - public ClusteringFunction getClusteringFunction(String name, Map params) throws PaceException { + public ClusteringFunction getClusteringFunction(String name, Map params) throws PaceException { try { return clusteringFunctions.get(name).getDeclaredConstructor(Map.class).newInstance(params); } catch (InstantiationException | IllegalAccessException | InvocationTargetException diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java index f9a1ea9e2a..80e349a3f7 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java @@ -15,7 +15,7 @@ import eu.dnetlib.pace.config.DedupConfig; public class ClusteringFunctionTest extends AbstractPaceTest { - private static Map params; + private static Map params; private static DedupConfig conf; @BeforeAll @@ -40,10 +40,10 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testNgram() { - params.put("ngramLen", 3); - params.put("max", 8); - params.put("maxPerToken", 2); - params.put("minNgramLen", 1); + params.put("ngramLen", "3"); + params.put("max", "8"); + params.put("maxPerToken", "2"); + params.put("minNgramLen", "1"); final ClusteringFunction ngram = new Ngrams(params); @@ -54,8 +54,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testNgramPairs() { - params.put("ngramLen", 3); - params.put("max", 2); + params.put("ngramLen", "3"); + params.put("max", "2"); final ClusteringFunction np = new NgramPairs(params); @@ -66,8 +66,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testSortedNgramPairs() { - params.put("ngramLen", 3); - params.put("max", 2); + params.put("ngramLen", "3"); + params.put("max", "2"); final ClusteringFunction np = new SortedNgramPairs(params); @@ -87,9 +87,9 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testAcronym() { - params.put("max", 4); - params.put("minLen", 1); - params.put("maxLen", 3); + params.put("max", "4"); + params.put("minLen", "1"); + params.put("maxLen", "3"); final ClusteringFunction acro = new Acronyms(params); @@ -100,8 +100,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testSuffixPrefix() { - params.put("len", 3); - params.put("max", 4); + params.put("len", "3"); + params.put("max", "4"); final ClusteringFunction sp = new SuffixPrefix(params); @@ -109,8 +109,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { System.out.println(s); System.out.println(sp.apply(conf, Lists.newArrayList(s))); - params.put("len", 3); - params.put("max", 1); + params.put("len", "3"); + params.put("max", "1"); System.out.println(sp.apply(conf, Lists.newArrayList("Framework for general-purpose deduplication"))); } @@ -118,8 +118,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testWordsSuffixPrefix() { - params.put("len", 3); - params.put("max", 4); + params.put("len", "3"); + params.put("max", "4"); final ClusteringFunction sp = new WordsSuffixPrefix(params); @@ -130,7 +130,7 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testWordsStatsSuffixPrefix() { - params.put("mod", 10); + params.put("mod", "10"); final ClusteringFunction sp = new WordsStatsSuffixPrefixChain(params); @@ -167,7 +167,7 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testFieldValue() { - params.put("randomLength", 5); + params.put("randomLength", "5"); final ClusteringFunction sp = new SpaceTrimmingFieldValue(params); diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/IncrementalConnectedComponentsTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/IncrementalConnectedComponentsTest.java new file mode 100644 index 0000000000..b0f105d7ce --- /dev/null +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/IncrementalConnectedComponentsTest.java @@ -0,0 +1,40 @@ + +package eu.dnetlib.pace.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +import org.junit.jupiter.api.Test; + +public class IncrementalConnectedComponentsTest { + + @Test + public void transitiveClosureTest() { + IncrementalConnectedComponents icc = new IncrementalConnectedComponents(10); + + icc.connect(0, 1); + icc.connect(0, 2); + icc.connect(0, 3); + + icc.connect(1, 2); + icc.connect(1, 4); + icc.connect(1, 5); + + icc.connect(6, 7); + icc.connect(6, 9); + + assertEquals(icc.getConnections(0).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(1).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(2).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(3).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(4).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(5).toString(), "{0, 1, 2, 3, 4, 5}"); + + assertEquals(icc.getConnections(6).toString(), "{6, 7, 9}"); + assertEquals(icc.getConnections(7).toString(), "{6, 7, 9}"); + assertEquals(icc.getConnections(9).toString(), "{6, 7, 9}"); + + assertNull(icc.getConnections(8)); + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java index 68af3d6994..0af7bb6d01 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java @@ -101,6 +101,10 @@ abstract class AbstractSparkAction implements Serializable { return SparkSession.builder().config(conf).getOrCreate(); } + protected static SparkSession getSparkWithHiveSession(SparkConf conf) { + return SparkSession.builder().enableHiveSupport().config(conf).getOrCreate(); + } + protected static void save(Dataset dataset, String outPath, SaveMode mode) { dataset.write().option("compression", "gzip").mode(mode).json(outPath); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 60669106a7..d9fb24078e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -2,20 +2,19 @@ package eu.dnetlib.dhp.oa.dedup; import java.lang.reflect.InvocationTargetException; -import java.util.*; -import java.util.stream.Collectors; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; -import org.apache.commons.beanutils.BeanUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.api.java.function.ReduceFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; @@ -41,88 +40,91 @@ public class DedupRecordFactory { long ts = System.currentTimeMillis(); // - Dataset> entities = spark + Dataset entities = spark .read() - .textFile(entitiesInputPath) + .schema(Encoders.bean(clazz).schema()) + .json(entitiesInputPath) + .as(Encoders.bean(clazz)) .map( - (MapFunction>) it -> { - T entity = OBJECT_MAPPER.readValue(it, clazz); + (MapFunction>) entity -> { return new Tuple2<>(entity.getId(), entity); }, - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))); + Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) + .selectExpr("_1 AS id", "_2 AS kryoObject"); // : source is the dedup_id, target is the id of the mergedIn - Dataset> mergeRels = spark + Dataset mergeRels = spark .read() .load(mergeRelsInputPath) - .as(Encoders.bean(Relation.class)) .where("relClass == 'merges'") - .map( - (MapFunction>) r -> new Tuple2<>(r.getSource(), r.getTarget()), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + .selectExpr("source as dedupId", "target as id"); return mergeRels - .joinWith(entities, mergeRels.col("_2").equalTo(entities.col("_1")), "inner") + .join(entities, "id") + .select("dedupId", "kryoObject") + .as(Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) + .groupByKey((MapFunction, String>) Tuple2::_1, Encoders.STRING()) + .reduceGroups( + (ReduceFunction>) (t1, t2) -> new Tuple2<>(t1._1(), + reduceEntity(t1._1(), t1._2(), t2._2(), clazz))) .map( - (MapFunction, Tuple2>, Tuple2>) value -> new Tuple2<>( - value._1()._1(), value._2()._2()), - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) - .groupByKey( - (MapFunction, String>) Tuple2::_1, Encoders.STRING()) - .mapGroups( - (MapGroupsFunction, T>) (key, - values) -> entityMerger(key, values, ts, dataInfo, clazz), + (MapFunction>, T>) t -> { + T res = t._2()._2(); + res.setDataInfo(dataInfo); + res.setLastupdatetimestamp(ts); + return res; + }, Encoders.bean(clazz)); } + public static T reduceEntity( + String id, T entity, T duplicate, Class clazz) { + + int compare = new IdentifierComparator() + .compare(Identifier.newInstance(entity), Identifier.newInstance(duplicate)); + + if (compare > 0) { + T swap = duplicate; + duplicate = entity; + entity = swap; + } + + entity.mergeFrom(duplicate); + entity.setId(id); + + if (ModelSupport.isSubClass(duplicate, Result.class)) { + Result re = (Result) entity; + Result rd = (Result) duplicate; + + List> authors = new ArrayList<>(); + if (re.getAuthor() != null) { + authors.add(re.getAuthor()); + } + if (rd.getAuthor() != null) { + authors.add(rd.getAuthor()); + } + + re.setAuthor(AuthorMerger.merge(authors)); + } + + return entity; + } + public static T entityMerger( String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) throws IllegalAccessException, InstantiationException, InvocationTargetException { + T base = entities.next()._2(); - final Comparator> idComparator = new IdentifierComparator<>(); - - final LinkedList entityList = Lists - .newArrayList(entities) - .stream() - .map(t -> Identifier.newInstance(t._2())) - .sorted(idComparator) - .map(Identifier::getEntity) - .collect(Collectors.toCollection(LinkedList::new)); - - final T entity = clazz.newInstance(); - final T first = entityList.removeFirst(); - - BeanUtils.copyProperties(entity, first); - - final List> authors = Lists.newArrayList(); - - entityList - .forEach( - duplicate -> { - entity.mergeFrom(duplicate); - if (ModelSupport.isSubClass(duplicate, Result.class)) { - Result r1 = (Result) duplicate; - Optional - .ofNullable(r1.getAuthor()) - .ifPresent(a -> authors.add(a)); - } - }); - - // set authors and date - if (ModelSupport.isSubClass(entity, Result.class)) { - Optional - .ofNullable(((Result) entity).getAuthor()) - .ifPresent(a -> authors.add(a)); - - ((Result) entity).setAuthor(AuthorMerger.merge(authors)); + while (entities.hasNext()) { + T duplicate = entities.next()._2(); + if (duplicate != null) + base = reduceEntity(id, base, duplicate, clazz); } - entity.setId(id); + base.setDataInfo(dataInfo); + base.setLastupdatetimestamp(ts); - entity.setLastupdatetimestamp(ts); - entity.setDataInfo(dataInfo); - - return entity; + return base; } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java index 7e0d660622..37e1bfd155 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.oa.dedup; +import static eu.dnetlib.dhp.utils.DHPUtils.md5; import static org.apache.commons.lang3.StringUtils.substringAfter; import static org.apache.commons.lang3.StringUtils.substringBefore; @@ -14,33 +15,36 @@ import eu.dnetlib.dhp.schema.oaf.utils.PidType; public class IdGenerator implements Serializable { // pick the best pid from the list (consider date and pidtype) - public static String generate(List> pids, String defaultID) { + public static String generate(List pids, String defaultID) { if (pids == null || pids.isEmpty()) return defaultID; return generateId(pids); } - private static String generateId(List> pids) { - Identifier bp = pids + private static String generateId(List pids) { + Identifier bp = pids .stream() .min(Identifier::compareTo) .orElseThrow(() -> new IllegalStateException("unable to generate id")); - String prefix = substringBefore(bp.getOriginalID(), "|"); - String ns = substringBefore(substringAfter(bp.getOriginalID(), "|"), "::"); - String suffix = substringAfter(bp.getOriginalID(), "::"); + return generate(bp.getOriginalID()); + } + + public static String generate(String originalId) { + String prefix = substringBefore(originalId, "|"); + String ns = substringBefore(substringAfter(originalId, "|"), "::"); + String suffix = substringAfter(originalId, "::"); final String pidType = substringBefore(ns, "_"); if (PidType.isValid(pidType)) { return prefix + "|" + dedupify(ns) + "::" + suffix; } else { - return prefix + "|dedup_wf_001::" + suffix; + return prefix + "|dedup_wf_001::" + md5(originalId); // hash the whole originalId to avoid collisions } } private static String dedupify(String ns) { - StringBuilder prefix; if (PidType.valueOf(substringBefore(ns, "_")) == PidType.openorgs) { prefix = new StringBuilder(substringBefore(ns, "_")); @@ -53,5 +57,4 @@ public class IdGenerator implements Serializable { } return prefix.substring(0, 12); } - } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index babbaaabd1..5bb132b899 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -1,51 +1,47 @@ package eu.dnetlib.dhp.oa.dedup; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; -import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVENANCE_DEDUP; - -import java.io.IOException; -import java.util.*; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; -import org.apache.spark.graphx.Edge; -import org.apache.spark.rdd.RDD; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.dom4j.DocumentException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; - -import com.google.common.collect.Lists; import com.google.common.hash.Hashing; - +import com.kwartile.lib.cc.ConnectedComponent; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.dedup.graph.ConnectedComponent; -import eu.dnetlib.dhp.oa.dedup.graph.GraphProcessor; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.*; +import org.apache.spark.sql.catalyst.encoders.RowEncoder; +import org.apache.spark.sql.expressions.UserDefinedFunction; +import org.apache.spark.sql.expressions.Window; +import org.apache.spark.sql.expressions.WindowSpec; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.StructType; +import org.dom4j.DocumentException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; +import scala.Tuple3; +import scala.collection.JavaConversions; + +import java.io.IOException; +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Optional; + +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVENANCE_DEDUP; +import static org.apache.spark.sql.functions.*; public class SparkCreateMergeRels extends AbstractSparkAction { @@ -68,10 +64,12 @@ public class SparkCreateMergeRels extends AbstractSparkAction { log.info("isLookupUrl {}", isLookUpUrl); SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", parser.get("hiveMetastoreUris")); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); - new SparkCreateMergeRels(parser, getSparkSession(conf)) + new SparkCreateMergeRels(parser, getSparkWithHiveSession(conf)) .run(ISLookupClientFactory.getLookUpService(isLookUpUrl)); } @@ -87,14 +85,15 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .ofNullable(parser.get("cutConnectedComponent")) .map(Integer::valueOf) .orElse(0); + + final String pivotHistoryDatabase = parser.get("pivotHistoryDatabase"); + log.info("connected component cut: '{}'", cut); log.info("graphBasePath: '{}'", graphBasePath); log.info("isLookUpUrl: '{}'", isLookUpUrl); log.info("actionSetId: '{}'", actionSetId); log.info("workingPath: '{}'", workingPath); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { final String subEntity = dedupConf.getWf().getSubEntityValue(); final Class clazz = ModelSupport.entityTypes.get(EntityType.valueOf(subEntity)); @@ -106,113 +105,170 @@ public class SparkCreateMergeRels extends AbstractSparkAction { final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, subEntity); - // - JavaPairRDD vertexes = createVertexes(sc, graphBasePath, subEntity, dedupConf); - - final RDD> edgeRdd = spark + final Dataset simRels = spark .read() .load(DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity)) - .as(Encoders.bean(Relation.class)) - .javaRDD() - .map(it -> new Edge<>(hash(it.getSource()), hash(it.getTarget()), it.getRelClass())) - .rdd(); + .select("source", "target"); - Dataset> rawMergeRels = spark - .createDataset( - GraphProcessor - .findCCs(vertexes.rdd(), edgeRdd, maxIterations, cut) - .toJavaRDD() - .filter(k -> k.getIds().size() > 1) - .flatMap(this::ccToRels) - .rdd(), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + UserDefinedFunction hashUDF = functions + .udf( + (String s) -> hash(s), DataTypes.LongType); - Dataset> entities = spark + // + Dataset vertexIdMap = simRels + .selectExpr("source as id") + .union(simRels.selectExpr("target as id")) + .distinct() + .withColumn("vertexId", hashUDF.apply(functions.col("id"))); + + final Dataset edges = spark .read() - .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .map( - (MapFunction>) it -> { - OafEntity entity = OBJECT_MAPPER.readValue(it, clazz); - return new Tuple2<>(entity.getId(), entity); - }, - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))); + .load(DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity)) + .select("source", "target") + .withColumn("source", hashUDF.apply(functions.col("source"))) + .withColumn("target", hashUDF.apply(functions.col("target"))); - Dataset mergeRels = rawMergeRels - .joinWith(entities, rawMergeRels.col("_2").equalTo(entities.col("_1")), "inner") - // , - .map( - (MapFunction, Tuple2>, Tuple2>) value -> new Tuple2<>( - value._1()._1(), value._2()._2()), - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) - // - .groupByKey( - (MapFunction, String>) Tuple2::_1, Encoders.STRING()) - .mapGroups( - (MapGroupsFunction, ConnectedComponent>) this::generateID, - Encoders.bean(ConnectedComponent.class)) - // + Dataset cliques = ConnectedComponent + .runOnPairs(edges, 50, spark); + + Dataset rawMergeRels = cliques + .join(vertexIdMap, JavaConversions.asScalaBuffer(Collections.singletonList("vertexId")), "inner") + .drop("vertexId") + .distinct(); + + Dataset pivotHistory = spark + .createDataset( + Collections.emptyList(), + RowEncoder + .apply(StructType.fromDDL("id STRING, firstUsage STRING, lastUsage STRING, dedupId STRING"))); + + if (StringUtils.isNotBlank(pivotHistoryDatabase)) { + pivotHistory = spark + .read() + .table(pivotHistoryDatabase + "." + subEntity) + .selectExpr("id", "lastUsage", "dedupId"); + } + + String collectedfromExpr = "false AS collectedfrom"; + String dateExpr = "'' AS date"; + + if (Result.class.isAssignableFrom(clazz)) { + if (Publication.class.isAssignableFrom(clazz)) { + collectedfromExpr = "array_contains(collectedfrom.key, '" + ModelConstants.CROSSREF_ID + + "') AS collectedfrom"; + } else if (eu.dnetlib.dhp.schema.oaf.Dataset.class.isAssignableFrom(clazz)) { + collectedfromExpr = "array_contains(collectedfrom.key, '" + ModelConstants.DATACITE_ID + + "') AS collectedfrom"; + } + + dateExpr = "dateofacceptance.value AS date"; + } + + UserDefinedFunction mapPid = udf( + (String s) -> Math.min(PidType.tryValueOf(s).ordinal(), PidType.w3id.ordinal()), DataTypes.IntegerType); + UserDefinedFunction validDate = udf((String date) -> { + if (StringUtils.isNotBlank(date) + && date.matches(DatePicker.DATE_PATTERN) && DatePicker.inRange(date)) { + return date; + } + return LocalDate.now().plusWeeks(1).toString(); + }, DataTypes.StringType); + + Dataset pivotingData = spark + .read() + .schema(Encoders.bean(clazz).schema()) + .json(DedupUtility.createEntityPath(graphBasePath, subEntity)) + .selectExpr( + "id", + "regexp_extract(id, '^\\\\d+\\\\|([^_]+).*::', 1) AS pidType", + collectedfromExpr, + dateExpr) + .withColumn("pidType", mapPid.apply(col("pidType"))) // ordinal of pid type + .withColumn("date", validDate.apply(col("date"))); + + UserDefinedFunction generateDedupId = udf((String s) -> IdGenerator.generate(s), DataTypes.StringType); + + // ordering to selected pivot id + WindowSpec w = Window + .partitionBy("groupId") + .orderBy( + col("lastUsage").desc_nulls_last(), + col("pidType").asc_nulls_last(), + col("collectedfrom").desc_nulls_last(), + col("date").asc_nulls_last(), + col("id").asc_nulls_last()); + + Dataset output = rawMergeRels + .join(pivotHistory, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "full") + .join(pivotingData, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") + .withColumn("pivot", functions.first("id").over(w)) + .withColumn("pivotDedupId", functions.first("dedupId").over(w)) + .withColumn("position", functions.row_number().over(w)) + .filter(cut > 0 ? col("position").lt(lit(cut)) : lit(true)) + // .select("id", "groupId", "collectedfrom", "pivot", "dedupId", "pivotDedupId") + // .distinct() .flatMap( - (FlatMapFunction) cc -> ccToMergeRel(cc, dedupConf), - Encoders.bean(Relation.class)); + (FlatMapFunction>) (Row r) -> { + String id = r.getAs("id"); + String pivot = r.getAs("pivot"); + String pivotDedupId = r.getAs("pivotDedupId"); // dedupId associated with the pivot + String dedupId = r.getAs("dedupId"); // dedupId associated with this id if it was a pivot - saveParquet(mergeRels, mergeRelPath, SaveMode.Overwrite); + // filter out id == pivotDedupId + // those are caused by claim expressed on pivotDedupId + // information will be merged after creating deduprecord + if (id.equals(pivotDedupId)) { + return Collections.emptyIterator(); + } + ArrayList> res = new ArrayList<>(); + + // singleton pivots have null groupId as they do not match rawMergeRels + if (r.isNullAt(r.fieldIndex("groupId"))) { + // the record is existing if it matches pivotingData + if (!r.isNullAt(r.fieldIndex("collectedfrom"))) { + // create relation with old dedup id + res.add(new Tuple3<>(id, dedupId, null)); + } + return res.iterator(); + } + + // new pivot, assign pivotDedupId with current IdGenerator + if (StringUtils.isBlank(pivotDedupId)) { + pivotDedupId = IdGenerator.generate(pivot); + } + + // this was a pivot in a preceding graph but it has been merged into a new group with different + // pivot + if (StringUtils.isNotBlank(dedupId) && !pivot.equals(id) && !dedupId.equals(pivotDedupId)) { + // materialize the previous dedup record as a merge relation with the new one + res.add(new Tuple3<>(dedupId, pivotDedupId, null)); + } + + // add merge relations + res.add(new Tuple3<>(id, pivotDedupId, pivot)); + + return res.iterator(); + }, Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.STRING())) + .distinct() + .flatMap( + (FlatMapFunction, Relation>) (Tuple3 r) -> { + String id = r._1(); + String dedupId = r._2(); + String pivot = r._3(); + + ArrayList res = new ArrayList<>(); + res.add(rel(pivot, dedupId, id, ModelConstants.MERGES, dedupConf)); + res.add(rel(pivot, id, dedupId, ModelConstants.IS_MERGED_IN, dedupConf)); + + return res.iterator(); + }, Encoders.bean(Relation.class)); + + saveParquet(output, mergeRelPath, SaveMode.Overwrite); } } - private ConnectedComponent generateID(String key, Iterator> values) { - - List> identifiers = Lists - .newArrayList(values) - .stream() - .map(v -> Identifier.newInstance(v._2())) - .collect(Collectors.toList()); - - String rootID = IdGenerator.generate(identifiers, key); - - if (Objects.equals(rootID, key)) - throw new IllegalStateException("generated default ID: " + rootID); - - return new ConnectedComponent(rootID, - identifiers.stream().map(i -> i.getEntity().getId()).collect(Collectors.toSet())); - } - - private JavaPairRDD createVertexes(JavaSparkContext sc, String graphBasePath, String subEntity, - DedupConfig dedupConf) { - - return sc - .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .mapToPair(json -> { - String id = MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), json); - return new Tuple2<>(hash(id), id); - }); - } - - private Iterator> ccToRels(ConnectedComponent cc) { - return cc - .getIds() - .stream() - .map(id -> new Tuple2<>(cc.getCcId(), id)) - .iterator(); - } - - private Iterator ccToMergeRel(ConnectedComponent cc, DedupConfig dedupConf) { - return cc - .getIds() - .stream() - .flatMap( - id -> { - List tmp = new ArrayList<>(); - - tmp.add(rel(cc.getCcId(), id, ModelConstants.MERGES, dedupConf)); - tmp.add(rel(id, cc.getCcId(), ModelConstants.IS_MERGED_IN, dedupConf)); - - return tmp.stream(); - }) - .iterator(); - } - - private Relation rel(String source, String target, String relClass, DedupConfig dedupConf) { + private static Relation rel(String pivot, String source, String target, String relClass, DedupConfig dedupConf) { String entityType = dedupConf.getWf().getEntityType(); @@ -238,6 +294,14 @@ public class SparkCreateMergeRels extends AbstractSparkAction { // TODO calculate the trust value based on the similarity score of the elements in the CC r.setDataInfo(info); + + if (pivot != null) { + KeyValue pivotKV = new KeyValue(); + pivotKV.setKey("pivot"); + pivotKV.setValue(pivot); + + r.setProperties(Arrays.asList(pivotKV)); + } return r; } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java index 65ad0c3278..60752a4574 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -91,18 +91,12 @@ public class SparkWhitelistSimRels extends AbstractSparkAction { Dataset entities = spark .read() .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .repartition(numPartitions) - .withColumn("id", functions.get_json_object(new Column("value"), dedupConf.getWf().getIdPath())); + .select(functions.get_json_object(new Column("value"), dedupConf.getWf().getIdPath()).as("id")) + .distinct(); - Dataset whiteListRels1 = whiteListRels - .join(entities, entities.col("id").equalTo(whiteListRels.col("from")), "inner") - .select("from", "to"); - - Dataset whiteListRels2 = whiteListRels1 - .join(entities, whiteListRels1.col("to").equalTo(entities.col("id")), "inner") - .select("from", "to"); - - Dataset whiteListSimRels = whiteListRels2 + Dataset whiteListSimRels = whiteListRels + .join(entities, entities.col("id").equalTo(whiteListRels.col("from")), "leftsemi") + .join(entities, functions.col("to").equalTo(entities.col("id")), "leftsemi") .map( (MapFunction) r -> DedupUtility .createSimRel(r.getString(0), r.getString(1), entity), diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java deleted file mode 100644 index 4a39a175d4..0000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java +++ /dev/null @@ -1,100 +0,0 @@ - -package eu.dnetlib.dhp.oa.dedup.graph; - -import java.io.IOException; -import java.io.Serializable; -import java.util.Set; -import java.util.stream.Collectors; - -import org.apache.commons.lang3.StringUtils; -import org.codehaus.jackson.annotate.JsonIgnore; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.pace.util.PaceException; - -public class ConnectedComponent implements Serializable { - - private String ccId; - private Set ids; - - private static final String CONNECTED_COMPONENT_ID_PREFIX = "connect_comp"; - - public ConnectedComponent(Set ids, final int cut) { - this.ids = ids; - - this.ccId = createDefaultID(); - - if (cut > 0 && ids.size() > cut) { - this.ids = ids - .stream() - .filter(id -> !ccId.equalsIgnoreCase(id)) - .limit(cut - 1) - .collect(Collectors.toSet()); -// this.ids.add(ccId); ?? - } - } - - public ConnectedComponent(String ccId, Set ids) { - this.ccId = ccId; - this.ids = ids; - } - - public String createDefaultID() { - if (ids.size() > 1) { - final String s = getMin(); - String prefix = s.split("\\|")[0]; - ccId = prefix + "|" + CONNECTED_COMPONENT_ID_PREFIX + "::" + DHPUtils.md5(s); - return ccId; - } else { - return ids.iterator().next(); - } - } - - @JsonIgnore - public String getMin() { - - final StringBuilder min = new StringBuilder(); - - ids - .forEach( - id -> { - if (StringUtils.isBlank(min.toString())) { - min.append(id); - } else { - if (min.toString().compareTo(id) > 0) { - min.setLength(0); - min.append(id); - } - } - }); - return min.toString(); - } - - @Override - public String toString() { - ObjectMapper mapper = new ObjectMapper(); - try { - return mapper.writeValueAsString(this); - } catch (IOException e) { - throw new PaceException("Failed to create Json: ", e); - } - } - - public Set getIds() { - return ids; - } - - public void setIds(Set ids) { - this.ids = ids; - } - - public String getCcId() { - return ccId; - } - - public void setCcId(String ccId) { - this.ccId = ccId; - } -} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/GraphProcessor.scala b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/GraphProcessor.scala deleted file mode 100644 index f4dd85d758..0000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/GraphProcessor.scala +++ /dev/null @@ -1,37 +0,0 @@ -package eu.dnetlib.dhp.oa.dedup.graph - -import org.apache.spark.graphx._ -import org.apache.spark.rdd.RDD - -import scala.collection.JavaConversions; - -object GraphProcessor { - - def findCCs(vertexes: RDD[(VertexId, String)], edges: RDD[Edge[String]], maxIterations: Int, cut:Int): RDD[ConnectedComponent] = { - val graph: Graph[String, String] = Graph(vertexes, edges).partitionBy(PartitionStrategy.RandomVertexCut) //TODO remember to remove partitionby - val cc = graph.connectedComponents(maxIterations).vertices - - val joinResult = vertexes.leftOuterJoin(cc).map { - case (id, (openaireId, cc)) => { - if (cc.isEmpty) { - (id, openaireId) - } - else { - (cc.get, openaireId) - } - } - } - val connectedComponents = joinResult.groupByKey() - .map[ConnectedComponent](cc => asConnectedComponent(cc, cut)) - connectedComponents - } - - - - def asConnectedComponent(group: (VertexId, Iterable[String]), cut:Int): ConnectedComponent = { - val docs = group._2.toSet[String] - val connectedComponent = new ConnectedComponent(JavaConversions.setAsJavaSet[String](docs), cut); - connectedComponent - } - -} \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java index 0cba4fc3ba..e03c3bf95e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java @@ -3,21 +3,21 @@ package eu.dnetlib.dhp.oa.dedup.model; import java.io.Serializable; import java.text.SimpleDateFormat; -import java.util.*; -import java.util.stream.Collectors; +import java.time.LocalDate; +import java.util.Date; +import java.util.List; +import java.util.Objects; import org.apache.commons.lang3.StringUtils; -import com.google.common.collect.Sets; - import eu.dnetlib.dhp.oa.dedup.DatePicker; import eu.dnetlib.dhp.oa.dedup.IdentifierComparator; import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; -import eu.dnetlib.dhp.schema.oaf.utils.PidComparator; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.PidType; public class Identifier implements Serializable, Comparable> { @@ -50,7 +50,7 @@ public class Identifier implements Serializable, Comparable if (Objects.nonNull(date)) { return date; } else { - String sDate = BASE_DATE; + String sDate = LocalDate.now().plusDays(1).toString(); if (ModelSupport.isSubClass(getEntity(), Result.class)) { Result result = (Result) getEntity(); if (isWellformed(result.getDateofacceptance())) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json index b1df08535e..4f9f4b0b52 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json @@ -28,5 +28,17 @@ "paramLongName": "workingPath", "paramDescription": "path for the working directory", "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hiveMetastoreUris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "pivotHistoryDatabase", + "paramDescription": "Pivot history database", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml index 2e0ed9aeea..cd29965e3d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml @@ -15,4 +15,8 @@ oozie.action.sharelib.for.spark spark2 + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml index ba2270c8a8..49a331def9 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml @@ -188,6 +188,8 @@ --isLookUpUrl${isLookUpUrl} --actionSetId${actionSetId} --cutConnectedComponent${cutConnectedComponent} + --hiveMetastoreUris${hiveMetastoreUris} + --pivotHistoryDatabase${pivotHistoryDatabase} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/scala/com/kwartile/lib/cc/ConnectedComponent.scala b/dhp-workflows/dhp-dedup-openaire/src/main/scala/com/kwartile/lib/cc/ConnectedComponent.scala new file mode 100644 index 0000000000..4c33622354 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/scala/com/kwartile/lib/cc/ConnectedComponent.scala @@ -0,0 +1,335 @@ +/** Copyright (c) 2017 Kwartile, Inc., http://www.kwartile.com + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** Map-reduce implementation of Connected Component + * Given lists of subgraphs, returns all the nodes that are connected. + */ + +package com.kwartile.lib.cc + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{Dataset, Row, SparkSession} +import org.apache.spark.storage.StorageLevel + +import scala.annotation.tailrec +import scala.collection.mutable + +object ConnectedComponent extends Serializable { + + /** Applies Small Star operation on RDD of nodePairs + * + * @param nodePairs on which to apply Small Star operations + * @return new nodePairs after the operation and conncectivy change count + */ + private def smallStar(nodePairs: RDD[(Long, Long)]): (RDD[(Long, Long)], Long) = { + + /** generate RDD of (self, List(neighbors)) where self > neighbors + * E.g.: nodePairs (1, 4), (6, 1), (3, 2), (6, 5) + * will result into (4, List(1)), (6, List(1)), (3, List(2)), (6, List(5)) + */ + val neighbors = nodePairs.map(x => { + val (self, neighbor) = (x._1, x._2) + if (self > neighbor) + (self, neighbor) + else + (neighbor, self) + }) + + /** reduce on self to get list of all its neighbors. + * E.g: (4, List(1)), (6, List(1)), (3, List(2)), (6, List(5)) + * will result into (4, List(1)), (6, List(1, 5)), (3, List(2)) + * Note: + * (1) you may need to tweak number of partitions. + * (2) also, watch out for data skew. In that case, consider using rangePartitioner + */ + val empty = mutable.HashSet[Long]() + val allNeighbors = neighbors.aggregateByKey(empty)( + (lb, v) => lb += v, + (lb1, lb2) => lb1 ++ lb2 + ) + + /** Apply Small Star operation on (self, List(neighbor)) to get newNodePairs and count the change in connectivity + */ + + val newNodePairsWithChangeCount = allNeighbors + .map(x => { + val self = x._1 + val neighbors = x._2.toList + val minNode = argMin(self :: neighbors) + val newNodePairs = (self :: neighbors) + .map(neighbor => { + (neighbor, minNode) + }) + .filter(x => { + val neighbor = x._1 + val minNode = x._2 + (neighbor <= self && neighbor != minNode) || (self == neighbor) + }) + val uniqueNewNodePairs = newNodePairs.toSet.toList + + /** We count the change by taking a diff of the new node pairs with the old node pairs + */ + val connectivityChangeCount = (uniqueNewNodePairs diff neighbors.map((self, _))).length + (uniqueNewNodePairs, connectivityChangeCount) + }) + .persist(StorageLevel.MEMORY_AND_DISK_SER) + + /** Sum all the changeCounts + */ + val totalConnectivityCountChange = newNodePairsWithChangeCount + .mapPartitions(iter => { + val (v, l) = iter.toSeq.unzip + val sum = l.sum + Iterator(sum) + }) + .sum + .toLong + + val newNodePairs = newNodePairsWithChangeCount.map(x => x._1).flatMap(x => x) + newNodePairsWithChangeCount.unpersist(false) + (newNodePairs, totalConnectivityCountChange) + } + + /** Apply Large Star operation on a RDD of nodePairs + * + * @param nodePairs on which to apply Large Star operations + * @return new nodePairs after the operation and conncectivy change count + */ + private def largeStar(nodePairs: RDD[(Long, Long)]): (RDD[(Long, Long)], Long) = { + + /** generate RDD of (self, List(neighbors)) + * E.g.: nodePairs (1, 4), (6, 1), (3, 2), (6, 5) + * will result into (4, List(1)), (1, List(4)), (6, List(1)), (1, List(6)), (3, List(2)), (2, List(3)), (6, List(5)), (5, List(6)) + */ + + val neighbors = nodePairs.flatMap(x => { + val (self, neighbor) = (x._1, x._2) + if (self == neighbor) + List((self, neighbor)) + else + List((self, neighbor), (neighbor, self)) + }) + + /** reduce on self to get list of all its neighbors. + * E.g: (4, List(1)), (1, List(4)), (6, List(1)), (1, List(6)), (3, List(2)), (2, List(3)), (6, List(5)), (5, List(6)) + * will result into (4, List(1)), (1, List(4, 6)), (6, List(1, 5)), (3, List(2)), (2, List(3)), (5, List(6)) + * Note: + * (1) you may need to tweak number of partitions. + * (2) also, watch out for data skew. In that case, consider using rangePartitioner + */ + + val localAdd = (s: mutable.HashSet[Long], v: Long) => s += v + val partitionAdd = (s1: mutable.HashSet[Long], s2: mutable.HashSet[Long]) => s1 ++= s2 + val allNeighbors = + neighbors.aggregateByKey(mutable.HashSet.empty[Long] /*, rangePartitioner*/ )(localAdd, partitionAdd) + + /** Apply Large Star operation on (self, List(neighbor)) to get newNodePairs and count the change in connectivity + */ + + val newNodePairsWithChangeCount = allNeighbors + .map(x => { + val self = x._1 + val neighbors = x._2.toList + val minNode = argMin(self :: neighbors) + val newNodePairs = (self :: neighbors) + .map(neighbor => { + (neighbor, minNode) + }) + .filter(x => { + val neighbor = x._1 + val minNode = x._2 + neighbor > self || neighbor == minNode + }) + + val uniqueNewNodePairs = newNodePairs.toSet.toList + val connectivityChangeCount = (uniqueNewNodePairs diff neighbors.map((self, _))).length + (uniqueNewNodePairs, connectivityChangeCount) + }) + .persist(StorageLevel.MEMORY_AND_DISK_SER) + + val totalConnectivityCountChange = newNodePairsWithChangeCount + .mapPartitions(iter => { + val (v, l) = iter.toSeq.unzip + val sum = l.sum + Iterator(sum) + }) + .sum + .toLong + + /** Sum all the changeCounts + */ + val newNodePairs = newNodePairsWithChangeCount.map(x => x._1).flatMap(x => x) + newNodePairsWithChangeCount.unpersist(false) + (newNodePairs, totalConnectivityCountChange) + } + + private def argMin(nodes: List[Long]): Long = { + nodes.min(Ordering.by((node: Long) => node)) + } + + /** Build nodePairs given a list of nodes. A list of nodes represents a subgraph. + * + * @param nodes that are part of a subgraph + * @return nodePairs for a subgraph + */ + private def buildPairs(nodes: List[Long]): List[(Long, Long)] = { + buildPairs(nodes.head, nodes.tail, null.asInstanceOf[List[(Long, Long)]]) + } + + @tailrec + private def buildPairs(node: Long, neighbors: List[Long], partialPairs: List[(Long, Long)]): List[(Long, Long)] = { + if (neighbors.isEmpty) { + if (partialPairs != null) + List((node, node)) ::: partialPairs + else + List((node, node)) + } else if (neighbors.length == 1) { + val neighbor = neighbors(0) + if (node > neighbor) + if (partialPairs != null) List((node, neighbor)) ::: partialPairs else List((node, neighbor)) + else if (partialPairs != null) List((neighbor, node)) ::: partialPairs + else List((neighbor, node)) + } else { + val newPartialPairs = neighbors + .map(neighbor => { + if (node > neighbor) + List((node, neighbor)) + else + List((neighbor, node)) + }) + .flatMap(x => x) + + if (partialPairs != null) + buildPairs(neighbors.head, neighbors.tail, newPartialPairs ::: partialPairs) + else + buildPairs(neighbors.head, neighbors.tail, newPartialPairs) + } + } + + /** Implements alternatingAlgo. Converges when the changeCount is either 0 or does not change from the previous iteration + * + * @param nodePairs for a graph + * @param largeStarConnectivityChangeCount change count that resulted from the previous iteration + * @param smallStarConnectivityChangeCount change count that resulted from the previous iteration + * @param didConverge flag to indicate the alorigth converged + * @param currIterationCount counter to capture number of iterations + * @param maxIterationCount maximum number iterations to try before giving up + * @return RDD of nodePairs + */ + + @tailrec + private def alternatingAlgo( + nodePairs: RDD[(Long, Long)], + largeStarConnectivityChangeCount: Long, + smallStarConnectivityChangeCount: Long, + didConverge: Boolean, + currIterationCount: Int, + maxIterationCount: Int + ): (RDD[(Long, Long)], Boolean, Long) = { + + val iterationCount = currIterationCount + 1 + if (didConverge) + (nodePairs, true, currIterationCount) + else if (currIterationCount >= maxIterationCount) { + (nodePairs, false, currIterationCount) + } else { + + val (nodePairsLargeStar, currLargeStarConnectivityChangeCount) = largeStar(nodePairs) + val (nodePairsSmallStar, currSmallStarConnectivityChangeCount) = smallStar(nodePairsLargeStar) + + if ( + (currLargeStarConnectivityChangeCount == largeStarConnectivityChangeCount && + currSmallStarConnectivityChangeCount == smallStarConnectivityChangeCount) || + (currSmallStarConnectivityChangeCount == 0 && currLargeStarConnectivityChangeCount == 0) + ) { + alternatingAlgo( + nodePairsSmallStar, + currLargeStarConnectivityChangeCount, + currSmallStarConnectivityChangeCount, + true, + iterationCount, + maxIterationCount + ) + } else { + alternatingAlgo( + nodePairsSmallStar, + currLargeStarConnectivityChangeCount, + currSmallStarConnectivityChangeCount, + false, + iterationCount, + maxIterationCount + ) + } + } + } + + /** Driver function + * + * @param cliques list of nodes representing subgraphs (or cliques) + * @param maxIterationCount maximum number iterations to try before giving up + * @return Connected Components as nodePairs where second member of the nodePair is the minimum node in the component + */ + def run(cliques: RDD[List[Long]], maxIterationCount: Int): (RDD[(Long, Long)], Boolean, Long) = { + + val nodePairs = cliques + .map(aClique => { + buildPairs(aClique) + }) + .flatMap(x => x) + + val (cc, didConverge, iterCount) = alternatingAlgo(nodePairs, 9999999L, 9999999L, false, 0, maxIterationCount) + + if (didConverge) { + (cc, didConverge, iterCount) + } else { + (null.asInstanceOf[RDD[(Long, Long)]], didConverge, iterCount) + } + } + + def runOnPairs(nodePairs: RDD[(Long, Long)], maxIterationCount: Int): (RDD[(Long, Long)], Boolean, Long) = { + val (cc, didConverge, iterCount) = alternatingAlgo(nodePairs, 9999999L, 9999999L, false, 0, maxIterationCount) + + if (didConverge) { + (cc, didConverge, iterCount) + } else { + (null.asInstanceOf[RDD[(Long, Long)]], didConverge, iterCount) + } + } + + def runOnPairs(nodePairs: Dataset[Row], maxIterationCount: Int)(implicit spark: SparkSession): Dataset[Row] = { + import spark.implicits._ + + val (cc, didConverge, iterCount) = alternatingAlgo( + nodePairs.map(e => (e.getLong(0), e.getLong(1))).rdd, + 9999999L, + 9999999L, + false, + 0, + maxIterationCount + ) + + if (didConverge) { + cc.toDF("vertexId", "groupId") + } else { + null.asInstanceOf[Dataset[Row]] + } + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 6c4935637a..bd5a04e62f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -41,9 +41,13 @@ import com.google.common.collect.Sets; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.schema.sx.OafUtils; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import scala.Tuple2; @ExtendWith(MockitoExtension.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class) @@ -97,6 +101,7 @@ public class SparkDedupTest implements Serializable { final SparkConf conf = new SparkConf(); conf.set("spark.sql.shuffle.partitions", "200"); + conf.set("spark.sql.warehouse.dir", testOutputBasePath + "/spark-warehouse"); spark = SparkSession .builder() .appName(SparkDedupTest.class.getSimpleName()) @@ -186,11 +191,11 @@ public class SparkDedupTest implements Serializable { System.out.println("ds_simrel = " + ds_simrel); System.out.println("orp_simrel = " + orp_simrel); - assertEquals(1538, orgs_simrel); - assertEquals(3523, pubs_simrel); - assertEquals(168, sw_simrel); - assertEquals(221, ds_simrel); - assertEquals(3392, orp_simrel); + assertEquals(751, orgs_simrel); + assertEquals(546, pubs_simrel); + assertEquals(113, sw_simrel); + assertEquals(148, ds_simrel); + assertEquals(280, orp_simrel); } @@ -235,10 +240,10 @@ public class SparkDedupTest implements Serializable { .count(); // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) - assertEquals(1538, orgs_simrel); - assertEquals(3523, pubs_simrel); - assertEquals(221, ds_simrel); - assertEquals(3392, orp_simrel); + assertEquals(751, orgs_simrel); + assertEquals(546, pubs_simrel); + assertEquals(148, ds_simrel); + assertEquals(280, orp_simrel); // System.out.println("orgs_simrel = " + orgs_simrel); // System.out.println("pubs_simrel = " + pubs_simrel); // System.out.println("ds_simrel = " + ds_simrel); @@ -268,7 +273,7 @@ public class SparkDedupTest implements Serializable { && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])) .count() > 0); - assertEquals(170, sw_simrel.count()); + assertEquals(115, sw_simrel.count()); // System.out.println("sw_simrel = " + sw_simrel.count()); } @@ -292,7 +297,9 @@ public class SparkDedupTest implements Serializable { "-w", testOutputBasePath, "-cc", - "3" + "3", + "-h", + "" }); new SparkCreateMergeRels(parser, spark).run(isLookUpService); @@ -365,6 +372,113 @@ public class SparkDedupTest implements Serializable { .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); } + @Test + @Order(3) + void createMergeRelsWithPivotHistoryTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json")); + + spark.sql("CREATE DATABASE IF NOT EXISTS pivot_history_test"); + ModelSupport.oafTypes.keySet().forEach(entityType -> { + try { + spark + .read() + .json( + Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/pivot_history").toURI()) + .toFile() + .getAbsolutePath()) + .write() + .mode("overwrite") + .saveAsTable("pivot_history_test." + entityType); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + }); + + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath, + "-h", + "", + "-pivotHistoryDatabase", + "pivot_history_test" + + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .count(); + final Dataset pubs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .count(); + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .count(); + + final List merges = pubs + .filter("source == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'") + .collectAsList(); + assertEquals(3, merges.size()); + Set dups = Sets + .newHashSet( + "50|doi_________::3b1d0d8e8f930826665df9d6b82fbb73", + "50|doi_________::d5021b53204e4fdeab6ff5d5bc468032", + "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c"); + merges.forEach(r -> { + assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); + assertEquals(ModelConstants.DEDUP, r.getSubRelType()); + assertEquals(ModelConstants.MERGES, r.getRelClass()); + assertTrue(dups.contains(r.getTarget())); + }); + + final List mergedIn = pubs + .filter("target == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'") + .collectAsList(); + assertEquals(3, mergedIn.size()); + mergedIn.forEach(r -> { + assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); + assertEquals(ModelConstants.DEDUP, r.getSubRelType()); + assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass()); + assertTrue(dups.contains(r.getSource())); + }); + + assertEquals(1268, orgs_mergerel); + assertEquals(1112, pubs.count()); + assertEquals(292, sw_mergerel); + assertEquals(476, ds_mergerel); + assertEquals(742, orp_mergerel); +// System.out.println("orgs_mergerel = " + orgs_mergerel); +// System.out.println("pubs_mergerel = " + pubs_mergerel); +// System.out.println("sw_mergerel = " + sw_mergerel); +// System.out.println("ds_mergerel = " + ds_mergerel); +// System.out.println("orp_mergerel = " + orp_mergerel); + + } + @Test @Order(4) void createMergeRelsTest() throws Exception { @@ -382,7 +496,9 @@ public class SparkDedupTest implements Serializable { "-la", "lookupurl", "-w", - testOutputBasePath + testOutputBasePath, + "-h", + "" }); new SparkCreateMergeRels(parser, spark).run(isLookUpService); @@ -437,10 +553,10 @@ public class SparkDedupTest implements Serializable { }); assertEquals(1268, orgs_mergerel); - assertEquals(1450, pubs.count()); - assertEquals(286, sw_mergerel); - assertEquals(472, ds_mergerel); - assertEquals(738, orp_mergerel); + assertEquals(1112, pubs.count()); + assertEquals(292, sw_mergerel); + assertEquals(476, ds_mergerel); + assertEquals(742, orp_mergerel); // System.out.println("orgs_mergerel = " + orgs_mergerel); // System.out.println("pubs_mergerel = " + pubs_mergerel); // System.out.println("sw_mergerel = " + sw_mergerel); @@ -492,10 +608,10 @@ public class SparkDedupTest implements Serializable { .count(); assertEquals(86, orgs_deduprecord); - assertEquals(68, pubs.count()); - assertEquals(49, sw_deduprecord); + assertEquals(91, pubs.count()); + assertEquals(47, sw_deduprecord); assertEquals(97, ds_deduprecord); - assertEquals(92, orp_deduprecord); + assertEquals(93, orp_deduprecord); verifyRoot_1(mapper, pubs); @@ -629,13 +745,13 @@ public class SparkDedupTest implements Serializable { .distinct() .count(); - assertEquals(902, publications); + assertEquals(925, publications); assertEquals(839, organizations); assertEquals(100, projects); assertEquals(100, datasource); - assertEquals(198, softwares); + assertEquals(196, softwares); assertEquals(389, dataset); - assertEquals(520, otherresearchproduct); + assertEquals(521, otherresearchproduct); // System.out.println("publications = " + publications); // System.out.println("organizations = " + organizations); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json index fa889d63b7..ff6670f1e4 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json @@ -101,7 +101,8 @@ "type" : "String", "path" : "$.title[?(@.qualifier.classid == 'main title')].value", "length" : 250, - "size" : 5 + "size" : 5, + "clean": "title" }, { "name" : "authors", diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json index b45b6ae832..a4a3761a36 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json @@ -101,7 +101,8 @@ "type" : "String", "path" : "$.title[?(@.qualifier.classid == 'main title')].value", "length" : 250, - "size" : 5 + "size" : 5, + "clean": "title" }, { "name" : "authors", diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json index 15ebc7a6a0..c3a769874a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json @@ -29,9 +29,8 @@ }, "pace": { "clustering" : [ - { "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} }, - { "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } }, - { "name" : "lowercase", "fields" : [ "doi" ], "params" : { } } + { "name" : "numAuthorsTitleSuffixPrefixChain", "fields" : [ "num_authors", "title" ], "params" : { "mod" : "10" } }, + { "name" : "jsonlistclustering", "fields" : [ "pid" ], "params" : { "jpath_value": "$.value", "jpath_classid": "$.qualifier.classid"} } ], "decisionTree": { "start": { @@ -79,13 +78,37 @@ "ignoreUndefined": "false" }, "layer3": { + "fields": [ + { + "field": "authors", + "comparator": "authorsMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "surname_th": 0.75, + "fullname_th": 0.75, + "threshold": 0.6, + "mode": "full" + } + } + ], + "threshold": 0.6, + "aggregation": "MAX", + "positive": "layer4", + "negative": "NO_MATCH", + "undefined": "MATCH", + "ignoreUndefined": "true" + }, + "layer4": { "fields": [ { "field": "title", "comparator": "levensteinTitle", "weight": 1.0, "countIfUndefined": "true", - "params": {} + "params": { + "threshold": "0.99" + } } ], "threshold": 0.99, @@ -97,23 +120,25 @@ } }, "model": [ - { - "name": "doi", - "type": "String", - "path": "$.pid[?(@.qualifier.classid == 'doi')].value" - }, { "name": "pid", "type": "JSON", "path": "$.pid", "overrideMatch": "true" }, + { + "name": "alternateid", + "type": "JSON", + "path": "$.instance[*].alternateIdentifier[*]", + "overrideMatch": "true" + }, { "name": "title", "type": "String", "path": "$.title[?(@.qualifier.classid == 'main title')].value", "length": 250, - "size": 5 + "size": 5, + "clean": "title" }, { "name": "authors", @@ -122,9 +147,9 @@ "size": 200 }, { - "name": "resulttype", + "name": "num_authors", "type": "String", - "path": "$.resulttype.classid" + "path": "$.author.length()" } ], "blacklists": { diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json index f53ff385f9..3c6c8aa5f0 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json @@ -75,7 +75,8 @@ "type" : "String", "path" : "$.title[?(@.qualifier.classid == 'main title')].value", "length" : 250, - "size" : 5 + "size" : 5, + "clean": "title" }, { "name" : "url", diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/pivot_history/pivot_history.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/pivot_history/pivot_history.json new file mode 100644 index 0000000000..8af1a6d069 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/pivot_history/pivot_history.json @@ -0,0 +1 @@ +{"id": "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c", "firstUsage": "2022-01-01", "lastUsage": "2022-01-01", "dedupId": "50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c" } \ No newline at end of file diff --git a/pom.xml b/pom.xml index 3fd351c1db..6ef320253e 100644 --- a/pom.xml +++ b/pom.xml @@ -931,5 +931,25 @@ --> + + + + arm-silicon-mac + + + aarch64 + mac + + + + + + org.xerial.snappy + snappy-java + 1.1.8.4 + + + + \ No newline at end of file From 1287315ffb546397bcbcac588fd5b80a62cab665 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Mon, 11 Dec 2023 21:26:05 +0100 Subject: [PATCH 22/56] Do no longer use dedupId information from pivotHistory Database --- .../dhp/oa/dedup/SparkCreateMergeRels.java | 85 ++++++++++--------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index 5bb132b899..46c29494ec 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -1,24 +1,23 @@ package eu.dnetlib.dhp.oa.dedup; -import com.google.common.hash.Hashing; -import com.kwartile.lib.cc.ConnectedComponent; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.config.DedupConfig; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVENANCE_DEDUP; +import static org.apache.spark.sql.functions.*; + +import java.io.IOException; +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.Dataset; import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.catalyst.encoders.RowEncoder; import org.apache.spark.sql.expressions.UserDefinedFunction; import org.apache.spark.sql.expressions.Window; @@ -29,20 +28,23 @@ import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; + +import com.google.common.hash.Hashing; +import com.kwartile.lib.cc.ConnectedComponent; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.EntityType; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.pace.config.DedupConfig; import scala.Tuple3; import scala.collection.JavaConversions; -import java.io.IOException; -import java.time.LocalDate; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Optional; - -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; -import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVENANCE_DEDUP; -import static org.apache.spark.sql.functions.*; - public class SparkCreateMergeRels extends AbstractSparkAction { private static final Logger log = LoggerFactory.getLogger(SparkCreateMergeRels.class); @@ -121,6 +123,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .distinct() .withColumn("vertexId", hashUDF.apply(functions.col("id"))); + // transform simrels into pairs of numeric ids final Dataset edges = spark .read() .load(DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity)) @@ -128,27 +131,34 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .withColumn("source", hashUDF.apply(functions.col("source"))) .withColumn("target", hashUDF.apply(functions.col("target"))); + // resolve connected components + // ("vertexId", "groupId") Dataset cliques = ConnectedComponent .runOnPairs(edges, 50, spark); + // transform "vertexId" back to its original string value + // groupId is kept numeric as its string value is not used + // ("id", "groupId") Dataset rawMergeRels = cliques .join(vertexIdMap, JavaConversions.asScalaBuffer(Collections.singletonList("vertexId")), "inner") .drop("vertexId") .distinct(); + // empty dataframe if historydatabase is not used Dataset pivotHistory = spark .createDataset( Collections.emptyList(), RowEncoder - .apply(StructType.fromDDL("id STRING, firstUsage STRING, lastUsage STRING, dedupId STRING"))); + .apply(StructType.fromDDL("id STRING, lastUsage STRING"))); if (StringUtils.isNotBlank(pivotHistoryDatabase)) { pivotHistory = spark .read() .table(pivotHistoryDatabase + "." + subEntity) - .selectExpr("id", "lastUsage", "dedupId"); + .selectExpr("id", "lastUsage"); } + // depending on resulttype collectefrom and dateofacceptance are evaluated differently String collectedfromExpr = "false AS collectedfrom"; String dateExpr = "'' AS date"; @@ -164,8 +174,10 @@ public class SparkCreateMergeRels extends AbstractSparkAction { dateExpr = "dateofacceptance.value AS date"; } + // cap pidType at w3id as from there on they are considered equal UserDefinedFunction mapPid = udf( (String s) -> Math.min(PidType.tryValueOf(s).ordinal(), PidType.w3id.ordinal()), DataTypes.IntegerType); + UserDefinedFunction validDate = udf((String date) -> { if (StringUtils.isNotBlank(date) && date.matches(DatePicker.DATE_PATTERN) && DatePicker.inRange(date)) { @@ -186,8 +198,6 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .withColumn("pidType", mapPid.apply(col("pidType"))) // ordinal of pid type .withColumn("date", validDate.apply(col("date"))); - UserDefinedFunction generateDedupId = udf((String s) -> IdGenerator.generate(s), DataTypes.StringType); - // ordering to selected pivot id WindowSpec w = Window .partitionBy("groupId") @@ -202,17 +212,15 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .join(pivotHistory, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "full") .join(pivotingData, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") .withColumn("pivot", functions.first("id").over(w)) - .withColumn("pivotDedupId", functions.first("dedupId").over(w)) .withColumn("position", functions.row_number().over(w)) - .filter(cut > 0 ? col("position").lt(lit(cut)) : lit(true)) - // .select("id", "groupId", "collectedfrom", "pivot", "dedupId", "pivotDedupId") - // .distinct() + .filter(cut > 0 ? col("position").lt(lit(cut)) : lit(true)) // apply cut after choosing pivot .flatMap( (FlatMapFunction>) (Row r) -> { String id = r.getAs("id"); + String dedupId = IdGenerator.generate(id); + String pivot = r.getAs("pivot"); - String pivotDedupId = r.getAs("pivotDedupId"); // dedupId associated with the pivot - String dedupId = r.getAs("dedupId"); // dedupId associated with this id if it was a pivot + String pivotDedupId = IdGenerator.generate(pivot); // filter out id == pivotDedupId // those are caused by claim expressed on pivotDedupId @@ -233,14 +241,9 @@ public class SparkCreateMergeRels extends AbstractSparkAction { return res.iterator(); } - // new pivot, assign pivotDedupId with current IdGenerator - if (StringUtils.isBlank(pivotDedupId)) { - pivotDedupId = IdGenerator.generate(pivot); - } - - // this was a pivot in a preceding graph but it has been merged into a new group with different + // this was a pivot in a previous graph but it has been merged into a new group with different // pivot - if (StringUtils.isNotBlank(dedupId) && !pivot.equals(id) && !dedupId.equals(pivotDedupId)) { + if (!r.isNullAt(r.fieldIndex("lastUsage")) && !pivot.equals(id) && !dedupId.equals(pivotDedupId)) { // materialize the previous dedup record as a merge relation with the new one res.add(new Tuple3<>(dedupId, pivotDedupId, null)); } From 831cc1fddececffc80701931ad9dab4d9926192b Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 14 Dec 2023 11:51:02 +0100 Subject: [PATCH 23/56] Generate "merged" dedup id relations also for records that are filtered out by the cut parameters --- .../java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index 46c29494ec..191870d3b0 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -213,7 +213,6 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .join(pivotingData, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") .withColumn("pivot", functions.first("id").over(w)) .withColumn("position", functions.row_number().over(w)) - .filter(cut > 0 ? col("position").lt(lit(cut)) : lit(true)) // apply cut after choosing pivot .flatMap( (FlatMapFunction>) (Row r) -> { String id = r.getAs("id"); @@ -249,7 +248,9 @@ public class SparkCreateMergeRels extends AbstractSparkAction { } // add merge relations - res.add(new Tuple3<>(id, pivotDedupId, pivot)); + if (cut <=0 || r.getAs("position") <= cut) { + res.add(new Tuple3<>(id, pivotDedupId, pivot)); + } return res.iterator(); }, Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.STRING())) From 10e135db1eb26cf6383d02f2318c8e6701631553 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Fri, 22 Dec 2023 09:55:10 +0100 Subject: [PATCH 24/56] Use dedup_wf_002 in place of dedup_wf_001 to make explicit a different algorithm has been used to generate those kind of ids --- .../src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java index 37e1bfd155..1d3d4afdd8 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java @@ -40,7 +40,7 @@ public class IdGenerator implements Serializable { if (PidType.isValid(pidType)) { return prefix + "|" + dedupify(ns) + "::" + suffix; } else { - return prefix + "|dedup_wf_001::" + md5(originalId); // hash the whole originalId to avoid collisions + return prefix + "|dedup_wf_002::" + md5(originalId); // hash the whole originalId to avoid collisions } } From 3c66e3bd7bd7fbe14f068b5176ae3681e941fda9 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Fri, 22 Dec 2023 09:57:30 +0100 Subject: [PATCH 25/56] Create dedup record for "merged" pivots Do not create dedup records for group that have more than 20 different acceptance date --- .../dhp/oa/dedup/DedupRecordFactory.java | 255 +++++++++++------- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 4 +- 2 files changed, 158 insertions(+), 101 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index d9fb24078e..4c12d1dc65 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -1,130 +1,187 @@ package eu.dnetlib.dhp.oa.dedup; -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.ReduceFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; - -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Result; +import org.apache.commons.beanutils.BeanUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.ReduceFunction; +import org.apache.spark.sql.*; import scala.Tuple2; +import scala.Tuple3; +import scala.collection.JavaConversions; + +import java.util.*; +import java.util.stream.Stream; public class DedupRecordFactory { + public static final class DedupRecordReduceState { + public final String dedupId; - protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + public final ArrayList aliases = new ArrayList<>(); - private DedupRecordFactory() { - } + public final HashSet acceptanceDate = new HashSet<>(); - public static Dataset createDedupRecord( - final SparkSession spark, - final DataInfo dataInfo, - final String mergeRelsInputPath, - final String entitiesInputPath, - final Class clazz) { + public OafEntity entity; - long ts = System.currentTimeMillis(); + public DedupRecordReduceState(String dedupId, String id, OafEntity entity) { + this.dedupId = dedupId; + this.entity = entity; + if (entity == null) { + aliases.add(id); + } else { + if (Result.class.isAssignableFrom(entity.getClass())) { + Result result = (Result) entity; + if (result.getDateofacceptance() != null && StringUtils.isNotBlank(result.getDateofacceptance().getValue())) { + acceptanceDate.add(result.getDateofacceptance().getValue()); + } + } + } + } - // - Dataset entities = spark - .read() - .schema(Encoders.bean(clazz).schema()) - .json(entitiesInputPath) - .as(Encoders.bean(clazz)) - .map( - (MapFunction>) entity -> { - return new Tuple2<>(entity.getId(), entity); - }, - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) - .selectExpr("_1 AS id", "_2 AS kryoObject"); + public String getDedupId() { + return dedupId; + } + } + private static final int MAX_ACCEPTANCE_DATE = 20; - // : source is the dedup_id, target is the id of the mergedIn - Dataset mergeRels = spark - .read() - .load(mergeRelsInputPath) - .where("relClass == 'merges'") - .selectExpr("source as dedupId", "target as id"); + private DedupRecordFactory() { + } - return mergeRels - .join(entities, "id") - .select("dedupId", "kryoObject") - .as(Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) - .groupByKey((MapFunction, String>) Tuple2::_1, Encoders.STRING()) - .reduceGroups( - (ReduceFunction>) (t1, t2) -> new Tuple2<>(t1._1(), - reduceEntity(t1._1(), t1._2(), t2._2(), clazz))) - .map( - (MapFunction>, T>) t -> { - T res = t._2()._2(); - res.setDataInfo(dataInfo); - res.setLastupdatetimestamp(ts); - return res; - }, - Encoders.bean(clazz)); - } + public static Dataset createDedupRecord( + final SparkSession spark, + final DataInfo dataInfo, + final String mergeRelsInputPath, + final String entitiesInputPath, + final Class clazz) { - public static T reduceEntity( - String id, T entity, T duplicate, Class clazz) { + final long ts = System.currentTimeMillis(); + final Encoder beanEncoder = Encoders.bean(clazz); + final Encoder kryoEncoder = Encoders.kryo(clazz); - int compare = new IdentifierComparator() - .compare(Identifier.newInstance(entity), Identifier.newInstance(duplicate)); + // + Dataset entities = spark + .read() + .schema(Encoders.bean(clazz).schema()) + .json(entitiesInputPath) + .as(beanEncoder) + .map( + (MapFunction>) entity -> { + return new Tuple2<>(entity.getId(), entity); + }, + Encoders.tuple(Encoders.STRING(), kryoEncoder)) + .selectExpr("_1 AS id", "_2 AS kryoObject"); - if (compare > 0) { - T swap = duplicate; - duplicate = entity; - entity = swap; + // : source is the dedup_id, target is the id of the mergedIn + Dataset mergeRels = spark + .read() + .load(mergeRelsInputPath) + .where("relClass == 'merges'") + .selectExpr("source as dedupId", "target as id"); + + return mergeRels + .join(entities, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") + .select("dedupId", "id", "kryoObject") + .as(Encoders.tuple(Encoders.STRING(), Encoders.STRING(), kryoEncoder)) + .map((MapFunction, DedupRecordReduceState>) t -> new DedupRecordReduceState(t._1(), t._2(), t._3()), Encoders.kryo(DedupRecordReduceState.class)) + .groupByKey((MapFunction) DedupRecordReduceState::getDedupId, Encoders.STRING()) + .reduceGroups( + (ReduceFunction) (t1, t2) -> { + if (t1.entity == null) { + t2.aliases.addAll(t1.aliases); + return t2; + } + if (t1.acceptanceDate.size() < MAX_ACCEPTANCE_DATE) { + t1.acceptanceDate.addAll(t2.acceptanceDate); + } + t1.aliases.addAll(t2.aliases); + t1.entity = reduceEntity(t1.entity, t2.entity); + + return t1; + } + ) + .flatMap + ((FlatMapFunction, OafEntity>) t -> { + String dedupId = t._1(); + DedupRecordReduceState agg = t._2(); + + if (agg.acceptanceDate.size() >= MAX_ACCEPTANCE_DATE) { + return Collections.emptyIterator(); + } + + return Stream.concat(Stream.of(agg.getDedupId()), agg.aliases.stream()) + .map(id -> { + try { + OafEntity res = (OafEntity) BeanUtils.cloneBean(agg.entity); + res.setId(id); + res.setDataInfo(dataInfo); + res.setLastupdatetimestamp(ts); + return res; + } catch (Exception e) { + throw new RuntimeException(e); + } + }).iterator(); + }, beanEncoder); + } + + private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) { + + if (duplicate == null) { + return entity; } - entity.mergeFrom(duplicate); - entity.setId(id); - if (ModelSupport.isSubClass(duplicate, Result.class)) { - Result re = (Result) entity; - Result rd = (Result) duplicate; + int compare = new IdentifierComparator<>() + .compare(Identifier.newInstance(entity), Identifier.newInstance(duplicate)); - List> authors = new ArrayList<>(); - if (re.getAuthor() != null) { - authors.add(re.getAuthor()); - } - if (rd.getAuthor() != null) { - authors.add(rd.getAuthor()); - } + if (compare > 0) { + OafEntity swap = duplicate; + duplicate = entity; + entity = swap; + } - re.setAuthor(AuthorMerger.merge(authors)); - } + entity.mergeFrom(duplicate); - return entity; - } + if (ModelSupport.isSubClass(duplicate, Result.class)) { + Result re = (Result) entity; + Result rd = (Result) duplicate; - public static T entityMerger( - String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) - throws IllegalAccessException, InstantiationException, InvocationTargetException { - T base = entities.next()._2(); + List> authors = new ArrayList<>(); + if (re.getAuthor() != null) { + authors.add(re.getAuthor()); + } + if (rd.getAuthor() != null) { + authors.add(rd.getAuthor()); + } - while (entities.hasNext()) { - T duplicate = entities.next()._2(); - if (duplicate != null) - base = reduceEntity(id, base, duplicate, clazz); - } + re.setAuthor(AuthorMerger.merge(authors)); + } - base.setDataInfo(dataInfo); - base.setLastupdatetimestamp(ts); + return entity; + } - return base; - } + public static T entityMerger( + String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) { + T base = entities.next()._2(); + + while (entities.hasNext()) { + T duplicate = entities.next()._2(); + if (duplicate != null) + base = (T) reduceEntity(base, duplicate); + } + + base.setId(id); + base.setDataInfo(dataInfo); + base.setLastupdatetimestamp(ts); + + return base; + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index bd5a04e62f..8b3480e60b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -611,7 +611,7 @@ public class SparkDedupTest implements Serializable { assertEquals(91, pubs.count()); assertEquals(47, sw_deduprecord); assertEquals(97, ds_deduprecord); - assertEquals(93, orp_deduprecord); + assertEquals(92, orp_deduprecord); verifyRoot_1(mapper, pubs); @@ -751,7 +751,7 @@ public class SparkDedupTest implements Serializable { assertEquals(100, datasource); assertEquals(196, softwares); assertEquals(389, dataset); - assertEquals(521, otherresearchproduct); + assertEquals(520, otherresearchproduct); // System.out.println("publications = " + publications); // System.out.println("organizations = " + organizations); From 2753044d13da0465b8b9061e70252ed6ac69a325 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 11 Jan 2024 16:28:26 +0100 Subject: [PATCH 26/56] refined mapping for the extraction of the original resource type --- .../dhp/oa/graph/raw/OafToOafMapper.java | 30 +++++++++++++++++-- .../dhp/oa/graph/raw/OdfToOafMapper.java | 19 ++++++++---- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index a63296d189..eee518353e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Set; import java.util.stream.Collectors; +import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Element; @@ -27,6 +28,15 @@ import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; public class OafToOafMapper extends AbstractMdRecordToOafMapper { + private static Set DC_TYPE_PUBLICATION_VERSION = new HashSet<>(); + + static { + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/submittedVersion"); + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/acceptedVersion"); + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/publishedVersion"); + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/updatedVersion"); + } + public OafToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId, final boolean forceOrginalId) { super(vocs, invisible, shouldHashId, forceOrginalId); @@ -192,24 +202,40 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { /** * The Dublin Core element dc:type can be repeated, but we need to base our mapping on a single value * So this method tries to give precedence to the COAR resource type, when available. Otherwise, it looks for the - * openaire's info:eu-repo type, and as last resort picks the 1st type text available + * openaire's info:eu-repo type, but excluding the following + * + * info:eu-repo/semantics/draft + * info:eu-repo/semantics/submittedVersion + * info:eu-repo/semantics/acceptedVersion + * info:eu-repo/semantics/publishedVersion + * info:eu-repo/semantics/updatedVersion + * + * Then, it picks the 1st dc:type text available and, in case there is no dc:type element, as last resort it tries + * to extract the type from the dr:CobjCategory element + * + * Examples: * * http://purl.org/coar/resource_type/c_5794 * info:eu-repo/semantics/article * Conference article + * 0006 * * @param doc the input document * @return the chosen resource type */ @Override protected String findOriginalType(Document doc) { - return (String) doc + final String dcType = (String) doc .selectNodes("//dc:type") .stream() .map(o -> "" + ((Node) o).getText().trim()) + .filter(t -> !DC_TYPE_PUBLICATION_VERSION.contains(t)) .sorted(new OriginalTypeComparator()) .findFirst() .orElse(null); + + final String drCobjCategory = doc.valueOf("//dr:CobjCategory/text()"); + return ObjectUtils.firstNonNull(dcType, drCobjCategory); } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index e63b01a002..08529125c8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -221,27 +221,36 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { } /** - * The Datacite element + * Extracts the resource type from The Datacite element * - * journal article + * journal article * * @param doc the input document * @return the chosen resource type */ @Override protected String findOriginalType(Document doc) { - return Optional + final String resourceType = Optional .ofNullable( (Element) doc .selectSingleNode( "//*[local-name()='metadata']/*[local-name() = 'resource']/*[local-name() = 'resourceType']")) .map(element -> { - final String resourceTypeURI = element.attributeValue("anyURI"); + final String resourceTypeURI = element.attributeValue("uri"); + final String resourceTypeAnyURI = element.attributeValue("anyURI"); final String resourceTypeTxt = element.getText(); + final String resourceTypeGeneral = element.attributeValue("resourceTypeGeneral"); - return ObjectUtils.firstNonNull(resourceTypeURI, resourceTypeTxt); + return ObjectUtils + .firstNonNull(resourceTypeURI, resourceTypeAnyURI, resourceTypeTxt, resourceTypeGeneral); }) .orElse(null); + + final String drCobjCategory = doc.valueOf("//dr:CobjCategory/text()"); + return ObjectUtils.firstNonNull(resourceType, drCobjCategory); } @Override From f61212593975db113209f23ca17deb69075a7446 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 12 Jan 2024 10:20:28 +0100 Subject: [PATCH 27/56] fix issue on FoS integration. Removing the null values from FoS --- .../PrepareFOSSparkJob.java | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 57ad8b96ae..b1ffe7f371 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -124,8 +124,19 @@ public class PrepareFOSSparkJob implements Serializable { FOSDataModel first) { level1.add(first.getLevel1()); level2.add(first.getLevel2()); - level3.add(first.getLevel3() + "@@" + first.getScoreL3()); - level4.add(first.getLevel4() + "@@" + first.getScoreL4()); + if (Optional.ofNullable(first.getLevel3()).isPresent() && + !first.getLevel3().equalsIgnoreCase(NA) && !first.getLevel3().equalsIgnoreCase(NULL) + && first.getLevel3() != null) + level3.add(first.getLevel3() + "@@" + first.getScoreL3()); + else + level3.add(NULL); + if (Optional.ofNullable(first.getLevel4()).isPresent() && + !first.getLevel4().equalsIgnoreCase(NA) && + !first.getLevel4().equalsIgnoreCase(NULL) && + first.getLevel4() != null) + level4.add(first.getLevel4() + "@@" + first.getScoreL4()); + else + level4.add(NULL); } } From 21a14fcd800944d2a7fca1c70ad77726536f2b97 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Mon, 15 Jan 2024 00:08:07 +0100 Subject: [PATCH 28/56] Reusable RunSQLSparkJob for executing SQL in Spark through Oozie Spark Actions Implements pivots table update oozie workflow --- .../eu/dnetlib/dhp/oozie/RunSQLSparkJob.java | 75 +++++++++++++++ .../dnetlib/dhp/oozie/run_sql_parameters.json | 20 ++++ .../pivothistory/oozie_app/config-default.xml | 26 +++++ .../oa/dedup/pivothistory/oozie_app/sql.sql | 62 ++++++++++++ .../dedup/pivothistory/oozie_app/workflow.xml | 95 +++++++++++++++++++ 5 files changed, 278 insertions(+) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java create mode 100644 dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/workflow.xml diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java new file mode 100644 index 0000000000..ef296bfc90 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java @@ -0,0 +1,75 @@ + +package eu.dnetlib.dhp.oozie; + +import com.google.common.io.Resources; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import org.apache.commons.lang3.time.DurationFormatUtils; +import org.apache.commons.text.StringSubstitutor; +import org.apache.spark.SparkConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; + +public class RunSQLSparkJob { + private static final Logger log = LoggerFactory.getLogger(RunSQLSparkJob.class); + + private final ArgumentApplicationParser parser; + + public RunSQLSparkJob(ArgumentApplicationParser parser) { + this.parser = parser; + } + + public static void main(String[] args) throws Exception { + + Map params = new HashMap<>(); + for (int i = 0; i < args.length - 1; i++) { + if (args[i].startsWith("--")) { + params.put(args[i].substring(2), args[++i]); + } + } + + /* + * String jsonConfiguration = IOUtils .toString( Objects .requireNonNull( RunSQLSparkJob.class + * .getResourceAsStream( "/eu/dnetlib/dhp/oozie/run_sql_parameters.json"))); final ArgumentApplicationParser + * parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); + */ + + Boolean isSparkSessionManaged = Optional + .ofNullable(params.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + URL url = com.google.common.io.Resources.getResource(params.get("sql")); + String raw_sql = Resources.toString(url, StandardCharsets.UTF_8); + + String sql = StringSubstitutor.replace(raw_sql, params); + log.info("sql: {}", sql); + + SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", params.get("hiveMetastoreUris")); + + runWithSparkHiveSession( + conf, + isSparkSessionManaged, + spark -> { + for (String statement : sql.split(";\\s*/\\*\\s*EOS\\s*\\*/\\s*")) { + log.info("executing: {}", statement); + long startTime = System.currentTimeMillis(); + spark.sql(statement).show(); + log + .info( + "executed in {}", + DurationFormatUtils.formatDuration(System.currentTimeMillis() - startTime, "HH:mm:ss.S")); + } + }); + } + +} diff --git a/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json new file mode 100644 index 0000000000..355f38e2fc --- /dev/null +++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "hmu", + "paramLongName": "hiveMetastoreUris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "sql", + "paramLongName": "sql", + "paramDescription": "sql script to execute", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/config-default.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/config-default.xml new file mode 100644 index 0000000000..17bb706477 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/config-default.xml @@ -0,0 +1,26 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + sparkSqlWarehouseDir + /user/hive/warehouse + + \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql new file mode 100644 index 0000000000..86dbda1c97 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql @@ -0,0 +1,62 @@ + +CREATE TABLE `${pivot_history_db}`.`dataset_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`dataset` ON relation.source = dataset.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`dataset` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ +CREATE TABLE `${pivot_history_db}`.`publication_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`publication` ON relation.source = publication.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`publication` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ +CREATE TABLE `${pivot_history_db}`.`software_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`software` ON relation.source = software.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`software` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ +CREATE TABLE `${pivot_history_db}`.`otherresearchproduct_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`otherresearchproduct` ON relation.source = otherresearchproduct.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`otherresearchproduct` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ + + +DROP TABLE IF EXISTS `${pivot_history_db}`.`dataset_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`dataset` RENAME TO `${pivot_history_db}`.`dataset_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`dataset_new` RENAME TO `${pivot_history_db}`.`dataset`; /*EOS*/ + +DROP TABLE IF EXISTS `${pivot_history_db}`.`publication_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`publication` RENAME TO `${pivot_history_db}`.`publication_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`publication_new` RENAME TO `${pivot_history_db}`.`publication`; /*EOS*/ + +DROP TABLE IF EXISTS `${pivot_history_db}`.`software_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`software` RENAME TO `${pivot_history_db}`.`software_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`software_new` RENAME TO `${pivot_history_db}`.`software`; /*EOS*/ + +DROP TABLE IF EXISTS `${pivot_history_db}`.`otherresearchproduct_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`otherresearchproduct` RENAME TO `${pivot_history_db}`.`otherresearchproduct_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`otherresearchproduct_new` RENAME TO `${pivot_history_db}`.`otherresearchproduct`; /*EOS*/ diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/workflow.xml new file mode 100644 index 0000000000..d562f088e9 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/workflow.xml @@ -0,0 +1,95 @@ + + + + + pivot_history_db + + Pivot history DB on hive + + + new_graph_db + + New graph DB on hive + + + new_graph_date + + Creation date of new graph db + + + + + hiveMetastoreUris + hive server metastore URIs + + + sparkSqlWarehouseDir + + + + sparkClusterOpts + --conf spark.network.timeout=600 --conf spark.extraListeners= --conf spark.sql.queryExecutionListeners= --conf spark.yarn.historyServer.address=http://iis-cdh5-test-m3.ocean.icm.edu.pl:18088 --conf spark.eventLog.dir=hdfs://nameservice1/user/spark/applicationHistory + spark cluster-wide options + + + sparkResourceOpts + --executor-memory=3G --conf spark.executor.memoryOverhead=3G --executor-cores=6 --driver-memory=8G --driver-cores=4 + spark resource options + + + sparkApplicationOpts + --conf spark.sql.shuffle.partitions=3840 + spark resource options + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + Upgrade Pivot History + eu.dnetlib.dhp.oozie.RunSQLSparkJob + dhp-dedup-openaire-${projectVersion}.jar + + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + ${sparkClusterOpts} + ${sparkResourceOpts} + ${sparkApplicationOpts} + + --hiveMetastoreUris${hiveMetastoreUris} + --sqleu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql + --pivot_history_db${pivot_history_db} + --new_graph_db${new_graph_db} + --new_graph_date${new_graph_date} + + + + + + + \ No newline at end of file From 59eaccbd87197095c50c902458bf84777932e51f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 15 Jan 2024 17:49:54 +0100 Subject: [PATCH 29/56] [enrichment single step] refactoring to fix issue in disappeared result type --- .../main/java/eu/dnetlib/dhp/MoveResult.java | 84 +++++++++++++++++++ ...kResultToCommunityFromOrganizationJob.java | 66 +++++++++------ .../SparkResultToCommunityFromProject.java | 41 ++++----- .../eu/dnetlib/dhp/wf/main/job.properties | 6 +- .../dhp/wf/main/oozie_app/workflow.xml | 2 +- .../input_moveresult_parameters.json | 22 +++++ .../oozie_app/workflow.xml | 29 ++++++- .../oozie_app/workflow.xml | 29 ++++++- 8 files changed, 225 insertions(+), 54 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java new file mode 100644 index 0000000000..5ffcf8d3f8 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java @@ -0,0 +1,84 @@ + +package eu.dnetlib.dhp; + +import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; +import static eu.dnetlib.dhp.PropagationConstant.readPath; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Result; + +/** + * @author miriam.baglioni + * @Date 15/01/24 + */ +public class MoveResult implements Serializable { + private static final Logger log = LoggerFactory.getLogger(MoveResult.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkResultToCommunityFromOrganizationJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + moveResults(spark, inputPath, outputPath); + + }); + } + + public static void moveResults(SparkSession spark, String inputPath, String outputPath) { + + ModelSupport.entityTypes + .keySet() + .parallelStream() + .filter(e -> ModelSupport.isResult(e)) + // .parallelStream() + .forEach(e -> { + Class resultClazz = ModelSupport.entityTypes.get(e); + Dataset resultDataset = readPath(spark, inputPath + e.name(), resultClazz); + if (resultDataset.count() > 0) { + + resultDataset + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); + } + + }); + + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index cc87b80e5e..4f755266a1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -76,29 +76,41 @@ public class SparkResultToCommunityFromOrganizationJob { ModelSupport.entityTypes .keySet() .parallelStream() + .filter(e -> ModelSupport.isResult(e)) + // .parallelStream() .forEach(e -> { - if (ModelSupport.isResult(e)) { - Class resultClazz = ModelSupport.entityTypes.get(e); - removeOutputDir(spark, outputPath + e.name()); - Dataset result = readPath(spark, inputPath + e.name(), resultClazz); + // if () { + Class resultClazz = ModelSupport.entityTypes.get(e); + removeOutputDir(spark, outputPath + e.name()); + Dataset result = readPath(spark, inputPath + e.name(), resultClazz); - result - .joinWith( - possibleUpdates, - result.col("id").equalTo(possibleUpdates.col("resultId")), - "left_outer") - .map(resultCommunityFn(), Encoders.bean(resultClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + e.name()); + log.info("executing left join"); + result + .joinWith( + possibleUpdates, + result.col("id").equalTo(possibleUpdates.col("resultId")), + "left_outer") + .map(resultCommunityFn(), Encoders.bean(resultClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); - readPath(spark, outputPath + e.name(), resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(inputPath + e.name()); - } +// log +// .info( +// "reading results from " + outputPath + e.name() + " and copying them to " + inputPath +// + e.name()); +// Dataset tmp = readPath(spark, outputPath + e.name(), resultClazz); +// if (tmp.count() > 0){ +// +// tmp +// .write() +// .mode(SaveMode.Overwrite) +// .option("compression", "gzip") +// .json(inputPath + e.name()); +// } + + // } }); } @@ -115,11 +127,11 @@ public class SparkResultToCommunityFromOrganizationJob { .map(Context::getId) .collect(Collectors.toList()); - @SuppressWarnings("unchecked") - R res = (R) ret.getClass().newInstance(); + // @SuppressWarnings("unchecked") + // R res = (R) ret.getClass().newInstance(); - res.setId(ret.getId()); - List propagatedContexts = new ArrayList<>(); + // res.setId(ret.getId()); + // List propagatedContexts = new ArrayList<>(); for (String cId : communitySet) { if (!contextList.contains(cId)) { Context newContext = new Context(); @@ -133,11 +145,11 @@ public class SparkResultToCommunityFromOrganizationJob { PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, ModelConstants.DNET_PROVENANCE_ACTIONS))); - propagatedContexts.add(newContext); + ret.getContext().add(newContext); } } - res.setContext(propagatedContexts); - ret.mergeFrom(res); + // res.setContext(propagatedContexts); + // ret.mergeFrom(res); } return ret; }; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index dde5340617..bb712d8786 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -86,29 +86,30 @@ public class SparkResultToCommunityFromProject implements Serializable { ModelSupport.entityTypes .keySet() .parallelStream() + .filter(e -> ModelSupport.isResult(e)) .forEach(e -> { - if (ModelSupport.isResult(e)) { - removeOutputDir(spark, outputPath + e.name()); - Class resultClazz = ModelSupport.entityTypes.get(e); - Dataset result = readPath(spark, inputPath + e.name(), resultClazz); + // if () { + removeOutputDir(spark, outputPath + e.name()); + Class resultClazz = ModelSupport.entityTypes.get(e); + Dataset result = readPath(spark, inputPath + e.name(), resultClazz); - result - .joinWith( - possibleUpdates, - result.col("id").equalTo(possibleUpdates.col("resultId")), - "left_outer") - .map(resultCommunityFn(), Encoders.bean(resultClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + e.name()); + result + .joinWith( + possibleUpdates, + result.col("id").equalTo(possibleUpdates.col("resultId")), + "left_outer") + .map(resultCommunityFn(), Encoders.bean(resultClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); - readPath(spark, outputPath + e.name(), resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(inputPath + e.name()); - } + readPath(spark, outputPath + e.name(), resultClazz) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + e.name()); + // } }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 4cb759343c..a84e8ab6b4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,12 +1,12 @@ -sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched -resumeFrom=CountryPropagation +sourcePath=/tmp/beta_provision/graph/10_graph_orcid_enriched +resumeFrom=CommunityOrganization allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo datasourceWhitelistForCountryPropagation=10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48 #allowedtypes=pubsrepository::institutional allowedtypes=Institutional -outputPath=/tmp/miriam/enrichment_one_step +outputPath=/tmp/beta_provision/graph/11_graph_orcid pathMap ={"author":"$['author'][*]['fullname']", \ "title":"$['title'][*]['value']",\ "orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" ,\ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index 8e91707b6e..9b7fad3255 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -231,7 +231,7 @@ - + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json new file mode 100644 index 0000000000..4645be435e --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json @@ -0,0 +1,22 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml index 6aeffb4574..18c5f4f0f6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -69,7 +69,7 @@ yarn cluster - community2resultfromorganization-Publication + community2resultfromorganization eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob dhp-enrichment-${projectVersion}.jar @@ -88,6 +88,33 @@ --sourcePath${sourcePath}/ --outputPath${workingDir}/communityorganization/resulttocommunityfromorganization/ + + + + + + + yarn + cluster + community2resultfromorganization - move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/communityorganization/resulttocommunityfromorganization/ + --outputPath${sourcePath}/ + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml index dd845064b2..01e366c02f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml @@ -86,12 +86,37 @@ --sourcePath${sourcePath}/ --outputPath${workingDir}/communitythroughproject/ + + + + + + + yarn + cluster + community2resultfromorganization - move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/communitythroughproject/ + --outputPath${sourcePath}/ + + - - \ No newline at end of file From 67ce2d54be4019d3d1aa157cbc1d50eb03f1ea59 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 17 Jan 2024 16:50:00 +0100 Subject: [PATCH 30/56] [enrichment single step] refactoring to fix issues in disappeared result type --- .../SparkCountryPropagationJob.java | 6 -- .../SparkResultToCommunityFromProject.java | 10 +- ...parkResultToCommunityThroughSemRelJob.java | 21 +---- .../eu/dnetlib/dhp/wf/main/job.properties | 4 +- .../dhp/wf/main/oozie_app/workflow.xml | 2 +- .../bulktag/oozie_app/config-default.xml | 12 ++- .../bulktag/oozie_app/workflow.xml | 18 +++- .../oozie_app/config-default.xml | 4 +- .../countrypropagation/oozie_app/workflow.xml | 92 ++++++++++++++----- .../oozie_app/workflow.xml | 15 ++- .../oozie_app/workflow.xml | 20 ++-- .../projecttoresult/oozie_app/workflow.xml | 15 ++- .../input_moveresult_parameters.json | 0 .../oozie_app/workflow.xml | 14 ++- .../oozie_app/workflow.xml | 16 +++- .../oozie_app/workflow.xml | 49 +++++++++- .../oozie_app/workflow.xml | 14 ++- 17 files changed, 229 insertions(+), 83 deletions(-) rename dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/{ => resulttocommunityfromorganization}/input_moveresult_parameters.json (100%) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 92930c18bd..a0cc4c84a6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -97,12 +97,6 @@ public class SparkCountryPropagationJob { .mode(SaveMode.Overwrite) .json(outputPath); - readPath(spark, outputPath, resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(sourcePath); - } private static MapFunction, R> getCountryMergeFn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index bb712d8786..f9c36d7ca3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -104,11 +104,11 @@ public class SparkResultToCommunityFromProject implements Serializable { .option("compression", "gzip") .json(outputPath + e.name()); - readPath(spark, outputPath + e.name(), resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(inputPath + e.name()); +// readPath(spark, outputPath + e.name(), resultClazz) +// .write() +// .mode(SaveMode.Overwrite) +// .option("compression", "gzip") +// .json(inputPath + e.name()); // } }); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 4929c7582d..3cf2f73c3e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -101,11 +101,6 @@ public class SparkResultToCommunityThroughSemRelJob { .option("compression", "gzip") .json(outputPath); - readPath(spark, outputPath, resultClazz) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(inputPath); } private static MapFunction, R> contextUpdaterFn() { @@ -115,11 +110,11 @@ public class SparkResultToCommunityThroughSemRelJob { if (rcl.isPresent()) { Set contexts = new HashSet<>(); ret.getContext().forEach(c -> contexts.add(c.getId())); - List contextList = rcl + rcl .get() .getCommunityList() .stream() - .map( + .forEach( c -> { if (!contexts.contains(c)) { Context newContext = new Context(); @@ -133,19 +128,11 @@ public class SparkResultToCommunityThroughSemRelJob { PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, ModelConstants.DNET_PROVENANCE_ACTIONS))); - return newContext; + ret.getContext().add(newContext); } - return null; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList()); - @SuppressWarnings("unchecked") - R r = (R) ret.getClass().newInstance(); + }); - r.setId(ret.getId()); - r.setContext(contextList); - ret.mergeFrom(r); } return ret; diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index a84e8ab6b4..7e82d9b2c4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,12 +1,12 @@ sourcePath=/tmp/beta_provision/graph/10_graph_orcid_enriched -resumeFrom=CommunityOrganization +resumeFrom=CommunitySemanticRelation allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo datasourceWhitelistForCountryPropagation=10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48 #allowedtypes=pubsrepository::institutional allowedtypes=Institutional -outputPath=/tmp/beta_provision/graph/11_graph_orcid +outputPath=/tmp/miriam/graph/11_graph_orcid pathMap ={"author":"$['author'][*]['fullname']", \ "title":"$['title'][*]['value']",\ "orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" ,\ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index 9b7fad3255..8e91707b6e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -231,7 +231,7 @@ - + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml index fe82ae1940..2695253e62 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml @@ -45,10 +45,18 @@ sparkExecutorMemory - 6G + 5G sparkExecutorCores - 1 + 4 + + + memoryOverhead + 3G + + + partitions + 3284 \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml index 6c51634484..c7a9e8a263 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml @@ -12,6 +12,10 @@ baseURL The URL to access the community APIs + + startFrom> + undelete + @@ -26,12 +30,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${wf:conf('startFrom') eq 'undelete'} + + + + + @@ -45,7 +57,7 @@ yarn-cluster cluster - bulkTagging-publication + bulkTagging eu.dnetlib.dhp.bulktag.SparkBulkTagJob dhp-enrichment-${projectVersion}.jar @@ -53,6 +65,8 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} + --conf spark.executor.memoryOverhead=${memoryOverhead} + --conf spark.sql.shuffle.partitions=${partitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml index 2744ea92ba..1cb0b8a5ef 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml @@ -45,11 +45,11 @@ sparkExecutorMemory - 6G + 5G sparkExecutorCores - 1 + 4 spark2MaxExecutors diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml index 81d6dc3dc1..3a6e3edfb6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -12,6 +12,10 @@ allowedtypes the allowed types + + startFrom + undelete + @@ -25,7 +29,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -61,7 +73,7 @@ --sourcePath${sourcePath} --whitelist${whitelist} --allowedtypes${allowedtypes} - --outputPath${workingDir}/preparedInfo + --outputPath${workingDir}/country/preparedInfo @@ -95,10 +107,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/publication - --outputPath${workingDir}/publication - --workingPath${workingDir}/workingP + --outputPath${workingDir}/country/publication + --workingPath${workingDir}/country/workingP --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo @@ -125,10 +137,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/dataset - --outputPath${workingDir}/dataset - --workingPath${workingDir}/workingD + --outputPath${workingDir}/country/dataset + --workingPath${workingDir}/country/workingD --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo @@ -155,10 +167,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/otherresearchproduct - --outputPath${workingDir}/otherresearchproduct - --workingPath${workingDir}/workingO + --outputPath${workingDir}/country/otherresearchproduct + --workingPath${workingDir}/country/workingO --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo @@ -185,10 +197,10 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/software - --outputPath${workingDir}/software - --workingPath${workingDir}/workingS + --outputPath${workingDir}/country/software + --workingPath${workingDir}/country/workingS --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo @@ -224,9 +236,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/publication - --preparedInfoPath${workingDir}/publication + --preparedInfoPath${workingDir}/country/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/country/publication + --outputPath${workingDir}/country/country/publication @@ -253,9 +265,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/dataset - --preparedInfoPath${workingDir}/dataset + --preparedInfoPath${workingDir}/country/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/country/dataset + --outputPath${workingDir}/country/country/dataset @@ -282,9 +294,9 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/otherresearchproduct - --preparedInfoPath${workingDir}/otherresearchproduct + --preparedInfoPath${workingDir}/country/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/country/otherresearchproduct + --outputPath${workingDir}/country/country/otherresearchproduct @@ -311,15 +323,49 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/software - --preparedInfoPath${workingDir}/software + --preparedInfoPath${workingDir}/country/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/country/software + --outputPath${workingDir}/country/country/software - + + + + + yarn + cluster + community2resultfromorganization - move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/country/country/ + --outputPath${sourcePath}/ + + + + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml index 05824d209b..ecec3579b9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -4,7 +4,10 @@ sourcePath the source path - + + startFrom + undelete + @@ -18,7 +21,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index 483a805b10..bab1e55dfa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -114,7 +114,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -142,7 +142,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -170,7 +170,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -198,7 +198,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -225,8 +225,8 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --sourcePath${workingDir}/orcidprop - --outputPath${workingDir}/orcidprop/mergedOrcidAssoc + --sourcePath${workingDir}/orcid/orcidprop + --outputPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc @@ -261,7 +261,7 @@ --conf spark.hadoop.mapreduce.reduce.speculative=false --conf spark.sql.shuffle.partitions=3840 - --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/publication @@ -291,7 +291,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/dataset @@ -321,7 +321,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/otherresearchproduct @@ -351,7 +351,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/software diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml index f0db9c777f..f26f3f98b5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -8,7 +8,10 @@ allowedsemrels the allowed semantics - + + startFrom + undelete + @@ -22,7 +25,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_moveresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_moveresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml index 18c5f4f0f6..aa5357eea3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -8,6 +8,10 @@ baseURL the baseURL from where to reach the community APIs + + startFrom + undelete + @@ -21,7 +25,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml index 01e366c02f..0ceee5a7ec 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml @@ -8,6 +8,10 @@ baseURL the base URL to use to select the right community APIs + + startFrom + undelete + @@ -21,7 +25,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -94,7 +106,7 @@ yarn cluster - community2resultfromorganization - move results + move results eu.dnetlib.dhp.MoveResult dhp-enrichment-${projectVersion}.jar diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml index 773c7fba76..b5e6fbf057 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -16,9 +16,21 @@ outputPath the output path + + startFrom + undelete + - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -209,9 +221,9 @@ dhp-enrichment-${projectVersion}.jar --executor-cores=6 - --executor-memory=5G - --conf spark.executor.memoryOverhead=3g - --conf spark.sql.shuffle.partitions=3284 + --executor-memory=4G + --conf spark.executor.memoryOverhead=5G + --conf spark.sql.shuffle.partitions=15000 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -324,7 +336,34 @@ - + + + + + yarn + cluster + move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/communitysemrel/ + --outputPath${sourcePath}/ + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml index e963453da9..ca76a0e85b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -8,6 +8,10 @@ blacklist The list of institutional repositories that should not be used for the propagation + + startFrom + undelete + @@ -21,7 +25,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] From 82e9e262ee12e4cd55f1f8593893fc8e41b82a07 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 17 Jan 2024 17:38:03 +0100 Subject: [PATCH 31/56] [enrichment single step] remove parameter from execution --- .../SparkResultToProjectThroughSemRelJob.java | 29 +++++++++---------- .../eu/dnetlib/dhp/wf/main/job.properties | 2 +- .../projecttoresult/oozie_app/workflow.xml | 9 ------ 3 files changed, 15 insertions(+), 25 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index e7518673d8..a6466716a7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -64,7 +64,7 @@ public class SparkResultToProjectThroughSemRelJob { removeOutputDir(spark, outputPath); } execPropagation( - spark, outputPath, alreadyLinkedPath, potentialUpdatePath, saveGraph); + spark, outputPath, alreadyLinkedPath, potentialUpdatePath); }); } @@ -72,24 +72,23 @@ public class SparkResultToProjectThroughSemRelJob { SparkSession spark, String outputPath, String alreadyLinkedPath, - String potentialUpdatePath, - Boolean saveGraph) { + String potentialUpdatePath) { Dataset toaddrelations = readPath(spark, potentialUpdatePath, ResultProjectSet.class); Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, ResultProjectSet.class); - if (saveGraph) { - toaddrelations - .joinWith( - alreadyLinked, - toaddrelations.col("resultId").equalTo(alreadyLinked.col("resultId")), - "left_outer") - .flatMap(mapRelationRn(), Encoders.bean(Relation.class)) - .write() - .mode(SaveMode.Append) - .option("compression", "gzip") - .json(outputPath); - } + // if (saveGraph) { + toaddrelations + .joinWith( + alreadyLinked, + toaddrelations.col("resultId").equalTo(alreadyLinked.col("resultId")), + "left_outer") + .flatMap(mapRelationRn(), Encoders.bean(Relation.class)) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(outputPath); + // } } private static FlatMapFunction, Relation> mapRelationRn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 7e82d9b2c4..05db040903 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,5 +1,5 @@ sourcePath=/tmp/beta_provision/graph/10_graph_orcid_enriched -resumeFrom=CommunitySemanticRelation +resumeFrom=ResultProject allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml index f26f3f98b5..21cc5522f8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -97,17 +97,8 @@ --potentialUpdatePath${workingDir}/resultproject/preparedInfo/potentialUpdates --alreadyLinkedPath${workingDir}/resultproject/preparedInfo/alreadyLinked - - - - - - - - - \ No newline at end of file From a12a3eb143477271c7682877c793864fc6aa442f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Jan 2024 15:18:10 +0100 Subject: [PATCH 32/56] - --- .../dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java index 5ffcf8d3f8..c71ccb4391 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java @@ -30,7 +30,7 @@ public class MoveResult implements Serializable { public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - SparkResultToCommunityFromOrganizationJob.class + MoveResult.class .getResourceAsStream( "/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json")); From 6af536541d5187b6d162a456f8d8c9fa455220ad Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Jan 2024 15:35:40 +0100 Subject: [PATCH 33/56] [enrichment single step] moving parameter file in correct location --- .../input_moveresult_parameters.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/{resulttocommunityfromorganization => }/input_moveresult_parameters.json (100%) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_moveresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_moveresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json From bcc0a13981c61d25c073d3b497f83e52121b066b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Jan 2024 17:39:14 +0100 Subject: [PATCH 34/56] [enrichment single step] adding element in wf definition --- .../dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml index 21cc5522f8..287ee4ba80 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -30,7 +30,6 @@ ${wf:conf('startFrom') eq 'undelete'} - @@ -101,4 +100,5 @@ + \ No newline at end of file From c6b3401596f9e05cf980af479f156a3a10a2d9ae Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 19 Jan 2024 10:15:39 +0100 Subject: [PATCH 35/56] increased shuffle partitions for publications in the country propagation workflow --- .../wf/subworkflows/countrypropagation/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml index 3a6e3edfb6..b9cf695176 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -233,7 +233,7 @@ --conf spark.speculation=false --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=7680 --sourcePath${sourcePath}/publication --preparedInfoPath${workingDir}/country/publication From 2655eea5bc3075d4a649958c61971586db25452d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 19 Jan 2024 16:28:05 +0100 Subject: [PATCH 36/56] [orcid enrichment] drop paths before copying the non-modifyed contents --- .../dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml index ce117b5e9a..bbd3581c57 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml @@ -43,6 +43,17 @@ --graphPath${graphPath} --masteryarn + + + + + + + + + + + From 1c6db320f41882c34299e7c346d1c95d592d2644 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 22 Jan 2024 15:53:17 +0100 Subject: [PATCH 37/56] [graph provision] obtain context info from the context API instead from the ISLookUp service --- .../common/api/context/CategorySummary.java | 39 ++++++++++++++ .../api/context/CategorySummaryList.java | 7 +++ .../common/api/context/ConceptSummary.java | 52 +++++++++++++++++++ .../api/context/ConceptSummaryList.java | 7 +++ .../common/api/context/ContextSummary.java | 50 ++++++++++++++++++ .../api/context/ContextSummaryList.java | 7 +++ .../dhp/oa/provision/XmlConverterJob.java | 6 +-- .../dhp/oa/provision/utils/ContextMapper.java | 45 +++++++++++++++- .../dhp/oa/provision/oozie_app/workflow.xml | 6 ++- 9 files changed, 213 insertions(+), 6 deletions(-) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java new file mode 100644 index 0000000000..fff28dbdfd --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java @@ -0,0 +1,39 @@ + +package eu.dnetlib.dhp.common.api.context; + +public class CategorySummary { + + private String id; + + private String label; + + private boolean hasConcept; + + public String getId() { + return id; + } + + public String getLabel() { + return label; + } + + public boolean isHasConcept() { + return hasConcept; + } + + public CategorySummary setId(final String id) { + this.id = id; + return this; + } + + public CategorySummary setLabel(final String label) { + this.label = label; + return this; + } + + public CategorySummary setHasConcept(final boolean hasConcept) { + this.hasConcept = hasConcept; + return this; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java new file mode 100644 index 0000000000..7213a945a6 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java @@ -0,0 +1,7 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.ArrayList; + +public class CategorySummaryList extends ArrayList { +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java new file mode 100644 index 0000000000..a576f9a1e1 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.List; + +public class ConceptSummary { + + private String id; + + private String label; + + public boolean hasSubConcept; + + private List concepts; + + public String getId() { + return id; + } + + public String getLabel() { + return label; + } + + public List getConcepts() { + return concepts; + } + + public ConceptSummary setId(final String id) { + this.id = id; + return this; + } + + public ConceptSummary setLabel(final String label) { + this.label = label; + return this; + } + + public boolean isHasSubConcept() { + return hasSubConcept; + } + + public ConceptSummary setHasSubConcept(final boolean hasSubConcept) { + this.hasSubConcept = hasSubConcept; + return this; + } + + public ConceptSummary setConcept(final List concepts) { + this.concepts = concepts; + return this; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java new file mode 100644 index 0000000000..45ccd28109 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java @@ -0,0 +1,7 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.ArrayList; + +public class ConceptSummaryList extends ArrayList { +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java new file mode 100644 index 0000000000..46a0d0d5ad --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java @@ -0,0 +1,50 @@ + +package eu.dnetlib.dhp.common.api.context; + +public class ContextSummary { + + private String id; + + private String label; + + private String type; + + private String status; + + public String getId() { + return id; + } + + public String getLabel() { + return label; + } + + public String getType() { + return type; + } + + public String getStatus() { + return status; + } + + public ContextSummary setId(final String id) { + this.id = id; + return this; + } + + public ContextSummary setLabel(final String label) { + this.label = label; + return this; + } + + public ContextSummary setType(final String type) { + this.type = type; + return this; + } + + public ContextSummary setStatus(final String status) { + this.status = status; + return this; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java new file mode 100644 index 0000000000..6186000077 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java @@ -0,0 +1,7 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.ArrayList; + +public class ContextSummaryList extends ArrayList { +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index 518f411204..6f43ca3f72 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -62,8 +62,8 @@ public class XmlConverterJob { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final String isLookupUrl = parser.get("isLookupUrl"); - log.info("isLookupUrl: {}", isLookupUrl); + final String contextApiBaseUrl = parser.get("contextApiBaseUrl"); + log.info("contextApiBaseUrl: {}", contextApiBaseUrl); final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); @@ -71,7 +71,7 @@ public class XmlConverterJob { runWithSparkSession(conf, isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl)); + convertToXml(spark, inputPath, outputPath, ContextMapper.fromAPI(contextApiBaseUrl)); }); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java index bcaf406039..96d92fed6e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java @@ -1,18 +1,22 @@ package eu.dnetlib.dhp.oa.provision.utils; -import java.io.Serializable; -import java.io.StringReader; +import java.io.*; +import java.net.HttpURLConnection; +import java.net.URL; import java.util.HashMap; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.jetbrains.annotations.NotNull; import org.xml.sax.SAXException; import com.google.common.base.Joiner; +import eu.dnetlib.dhp.common.api.context.*; +import eu.dnetlib.dhp.common.rest.DNetRestClient; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -23,6 +27,42 @@ public class ContextMapper extends HashMap implements Serial private static final String XQUERY = "for $x in //RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType']//*[name()='context' or name()='category' or name()='concept'] return "; + public static ContextMapper fromAPI(final String baseURL) throws Exception { + + final ContextMapper contextMapper = new ContextMapper(); + + for (ContextSummary ctx : DNetRestClient.doGET(baseURL + "/contexts", ContextSummaryList.class)) { + + contextMapper.put(ctx.getId(), new ContextDef(ctx.getId(), ctx.getLabel(), "context", ctx.getType())); + + for (CategorySummary cat : DNetRestClient + .doGET(baseURL + "/context/" + ctx.getId(), CategorySummaryList.class)) { + contextMapper.put(cat.getId(), new ContextDef(cat.getId(), cat.getLabel(), "category", "")); + if (cat.isHasConcept()) { + for (ConceptSummary c : DNetRestClient + .doGET(baseURL + "/context/category/" + cat.getId(), ConceptSummaryList.class)) { + contextMapper.put(c.getId(), new ContextDef(c.getId(), c.getLabel(), "concept", "")); + if (c.isHasSubConcept()) { + for (ConceptSummary cs : c.getConcepts()) { + contextMapper.put(cs.getId(), new ContextDef(cs.getId(), cs.getLabel(), "concept", "")); + if (cs.isHasSubConcept()) { + for (ConceptSummary css : cs.getConcepts()) { + contextMapper + .put( + css.getId(), + new ContextDef(css.getId(), css.getLabel(), "concept", "")); + } + } + } + } + } + } + } + } + return contextMapper; + } + + @Deprecated public static ContextMapper fromIS(final String isLookupUrl) throws DocumentException, ISLookUpException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); @@ -32,6 +72,7 @@ public class ContextMapper extends HashMap implements Serial return fromXml(sb.toString()); } + @Deprecated public static ContextMapper fromXml(final String xml) throws DocumentException, SAXException { final ContextMapper contextMapper = new ContextMapper(); diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 2e7b11ddee..9eab960f07 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -9,6 +9,10 @@ isLookupUrl URL for the isLookup service + + contextApiBaseUrl + context API URL + relPartitions number or partitions for the relations Dataset @@ -589,7 +593,7 @@ --inputPath${workingDir}/join_entities --outputPath${workingDir}/xml - --isLookupUrl${isLookupUrl} + --contextApiBaseUrl${contextApiBaseUrl} From 6fd25cf549e3892d3d1f114848367ea00dd84399 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 23 Jan 2024 08:47:12 +0100 Subject: [PATCH 38/56] code formatting --- .../eu/dnetlib/dhp/oozie/RunSQLSparkJob.java | 18 +- .../dhp/oa/dedup/DedupRecordFactory.java | 284 +++++++++--------- .../dhp/oa/dedup/SparkCreateMergeRels.java | 5 +- 3 files changed, 158 insertions(+), 149 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java index ef296bfc90..027bf0735d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java @@ -1,13 +1,7 @@ package eu.dnetlib.dhp.oozie; -import com.google.common.io.Resources; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import org.apache.commons.lang3.time.DurationFormatUtils; -import org.apache.commons.text.StringSubstitutor; -import org.apache.spark.SparkConf; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.net.URL; import java.nio.charset.StandardCharsets; @@ -15,7 +9,15 @@ import java.util.HashMap; import java.util.Map; import java.util.Optional; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import org.apache.commons.lang3.time.DurationFormatUtils; +import org.apache.commons.text.StringSubstitutor; +import org.apache.spark.SparkConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.io.Resources; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; public class RunSQLSparkJob { private static final Logger log = LoggerFactory.getLogger(RunSQLSparkJob.class); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 4c12d1dc65..eddfba309d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -1,6 +1,16 @@ package eu.dnetlib.dhp.oa.dedup; +import java.util.*; +import java.util.stream.Stream; + +import org.apache.commons.beanutils.BeanUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.ReduceFunction; +import org.apache.spark.sql.*; + import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; @@ -8,180 +18,176 @@ import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Result; -import org.apache.commons.beanutils.BeanUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.ReduceFunction; -import org.apache.spark.sql.*; import scala.Tuple2; import scala.Tuple3; import scala.collection.JavaConversions; -import java.util.*; -import java.util.stream.Stream; - public class DedupRecordFactory { - public static final class DedupRecordReduceState { - public final String dedupId; + public static final class DedupRecordReduceState { + public final String dedupId; - public final ArrayList aliases = new ArrayList<>(); + public final ArrayList aliases = new ArrayList<>(); - public final HashSet acceptanceDate = new HashSet<>(); + public final HashSet acceptanceDate = new HashSet<>(); - public OafEntity entity; + public OafEntity entity; - public DedupRecordReduceState(String dedupId, String id, OafEntity entity) { - this.dedupId = dedupId; - this.entity = entity; - if (entity == null) { - aliases.add(id); - } else { - if (Result.class.isAssignableFrom(entity.getClass())) { - Result result = (Result) entity; - if (result.getDateofacceptance() != null && StringUtils.isNotBlank(result.getDateofacceptance().getValue())) { - acceptanceDate.add(result.getDateofacceptance().getValue()); - } - } - } - } + public DedupRecordReduceState(String dedupId, String id, OafEntity entity) { + this.dedupId = dedupId; + this.entity = entity; + if (entity == null) { + aliases.add(id); + } else { + if (Result.class.isAssignableFrom(entity.getClass())) { + Result result = (Result) entity; + if (result.getDateofacceptance() != null + && StringUtils.isNotBlank(result.getDateofacceptance().getValue())) { + acceptanceDate.add(result.getDateofacceptance().getValue()); + } + } + } + } - public String getDedupId() { - return dedupId; - } - } - private static final int MAX_ACCEPTANCE_DATE = 20; + public String getDedupId() { + return dedupId; + } + } - private DedupRecordFactory() { - } + private static final int MAX_ACCEPTANCE_DATE = 20; - public static Dataset createDedupRecord( - final SparkSession spark, - final DataInfo dataInfo, - final String mergeRelsInputPath, - final String entitiesInputPath, - final Class clazz) { + private DedupRecordFactory() { + } - final long ts = System.currentTimeMillis(); - final Encoder beanEncoder = Encoders.bean(clazz); - final Encoder kryoEncoder = Encoders.kryo(clazz); + public static Dataset createDedupRecord( + final SparkSession spark, + final DataInfo dataInfo, + final String mergeRelsInputPath, + final String entitiesInputPath, + final Class clazz) { - // - Dataset entities = spark - .read() - .schema(Encoders.bean(clazz).schema()) - .json(entitiesInputPath) - .as(beanEncoder) - .map( - (MapFunction>) entity -> { - return new Tuple2<>(entity.getId(), entity); - }, - Encoders.tuple(Encoders.STRING(), kryoEncoder)) - .selectExpr("_1 AS id", "_2 AS kryoObject"); + final long ts = System.currentTimeMillis(); + final Encoder beanEncoder = Encoders.bean(clazz); + final Encoder kryoEncoder = Encoders.kryo(clazz); - // : source is the dedup_id, target is the id of the mergedIn - Dataset mergeRels = spark - .read() - .load(mergeRelsInputPath) - .where("relClass == 'merges'") - .selectExpr("source as dedupId", "target as id"); + // + Dataset entities = spark + .read() + .schema(Encoders.bean(clazz).schema()) + .json(entitiesInputPath) + .as(beanEncoder) + .map( + (MapFunction>) entity -> { + return new Tuple2<>(entity.getId(), entity); + }, + Encoders.tuple(Encoders.STRING(), kryoEncoder)) + .selectExpr("_1 AS id", "_2 AS kryoObject"); - return mergeRels - .join(entities, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") - .select("dedupId", "id", "kryoObject") - .as(Encoders.tuple(Encoders.STRING(), Encoders.STRING(), kryoEncoder)) - .map((MapFunction, DedupRecordReduceState>) t -> new DedupRecordReduceState(t._1(), t._2(), t._3()), Encoders.kryo(DedupRecordReduceState.class)) - .groupByKey((MapFunction) DedupRecordReduceState::getDedupId, Encoders.STRING()) - .reduceGroups( - (ReduceFunction) (t1, t2) -> { - if (t1.entity == null) { - t2.aliases.addAll(t1.aliases); - return t2; - } - if (t1.acceptanceDate.size() < MAX_ACCEPTANCE_DATE) { - t1.acceptanceDate.addAll(t2.acceptanceDate); - } - t1.aliases.addAll(t2.aliases); - t1.entity = reduceEntity(t1.entity, t2.entity); + // : source is the dedup_id, target is the id of the mergedIn + Dataset mergeRels = spark + .read() + .load(mergeRelsInputPath) + .where("relClass == 'merges'") + .selectExpr("source as dedupId", "target as id"); - return t1; - } - ) - .flatMap - ((FlatMapFunction, OafEntity>) t -> { - String dedupId = t._1(); - DedupRecordReduceState agg = t._2(); + return mergeRels + .join(entities, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") + .select("dedupId", "id", "kryoObject") + .as(Encoders.tuple(Encoders.STRING(), Encoders.STRING(), kryoEncoder)) + .map( + (MapFunction, DedupRecordReduceState>) t -> new DedupRecordReduceState( + t._1(), t._2(), t._3()), + Encoders.kryo(DedupRecordReduceState.class)) + .groupByKey( + (MapFunction) DedupRecordReduceState::getDedupId, Encoders.STRING()) + .reduceGroups( + (ReduceFunction) (t1, t2) -> { + if (t1.entity == null) { + t2.aliases.addAll(t1.aliases); + return t2; + } + if (t1.acceptanceDate.size() < MAX_ACCEPTANCE_DATE) { + t1.acceptanceDate.addAll(t2.acceptanceDate); + } + t1.aliases.addAll(t2.aliases); + t1.entity = reduceEntity(t1.entity, t2.entity); - if (agg.acceptanceDate.size() >= MAX_ACCEPTANCE_DATE) { - return Collections.emptyIterator(); - } + return t1; + }) + .flatMap((FlatMapFunction, OafEntity>) t -> { + String dedupId = t._1(); + DedupRecordReduceState agg = t._2(); - return Stream.concat(Stream.of(agg.getDedupId()), agg.aliases.stream()) - .map(id -> { - try { - OafEntity res = (OafEntity) BeanUtils.cloneBean(agg.entity); - res.setId(id); - res.setDataInfo(dataInfo); - res.setLastupdatetimestamp(ts); - return res; - } catch (Exception e) { - throw new RuntimeException(e); - } - }).iterator(); - }, beanEncoder); - } + if (agg.acceptanceDate.size() >= MAX_ACCEPTANCE_DATE) { + return Collections.emptyIterator(); + } - private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) { + return Stream + .concat(Stream.of(agg.getDedupId()), agg.aliases.stream()) + .map(id -> { + try { + OafEntity res = (OafEntity) BeanUtils.cloneBean(agg.entity); + res.setId(id); + res.setDataInfo(dataInfo); + res.setLastupdatetimestamp(ts); + return res; + } catch (Exception e) { + throw new RuntimeException(e); + } + }) + .iterator(); + }, beanEncoder); + } + + private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) { if (duplicate == null) { return entity; } + int compare = new IdentifierComparator<>() + .compare(Identifier.newInstance(entity), Identifier.newInstance(duplicate)); - int compare = new IdentifierComparator<>() - .compare(Identifier.newInstance(entity), Identifier.newInstance(duplicate)); - - if (compare > 0) { + if (compare > 0) { OafEntity swap = duplicate; - duplicate = entity; - entity = swap; - } + duplicate = entity; + entity = swap; + } - entity.mergeFrom(duplicate); + entity.mergeFrom(duplicate); - if (ModelSupport.isSubClass(duplicate, Result.class)) { - Result re = (Result) entity; - Result rd = (Result) duplicate; + if (ModelSupport.isSubClass(duplicate, Result.class)) { + Result re = (Result) entity; + Result rd = (Result) duplicate; - List> authors = new ArrayList<>(); - if (re.getAuthor() != null) { - authors.add(re.getAuthor()); - } - if (rd.getAuthor() != null) { - authors.add(rd.getAuthor()); - } + List> authors = new ArrayList<>(); + if (re.getAuthor() != null) { + authors.add(re.getAuthor()); + } + if (rd.getAuthor() != null) { + authors.add(rd.getAuthor()); + } - re.setAuthor(AuthorMerger.merge(authors)); - } + re.setAuthor(AuthorMerger.merge(authors)); + } - return entity; - } + return entity; + } - public static T entityMerger( - String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) { - T base = entities.next()._2(); + public static T entityMerger( + String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) { + T base = entities.next()._2(); - while (entities.hasNext()) { - T duplicate = entities.next()._2(); - if (duplicate != null) - base = (T) reduceEntity(base, duplicate); - } + while (entities.hasNext()) { + T duplicate = entities.next()._2(); + if (duplicate != null) + base = (T) reduceEntity(base, duplicate); + } - base.setId(id); - base.setDataInfo(dataInfo); - base.setLastupdatetimestamp(ts); + base.setId(id); + base.setDataInfo(dataInfo); + base.setLastupdatetimestamp(ts); - return base; - } + return base; + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index 191870d3b0..59626c1414 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -242,13 +242,14 @@ public class SparkCreateMergeRels extends AbstractSparkAction { // this was a pivot in a previous graph but it has been merged into a new group with different // pivot - if (!r.isNullAt(r.fieldIndex("lastUsage")) && !pivot.equals(id) && !dedupId.equals(pivotDedupId)) { + if (!r.isNullAt(r.fieldIndex("lastUsage")) && !pivot.equals(id) + && !dedupId.equals(pivotDedupId)) { // materialize the previous dedup record as a merge relation with the new one res.add(new Tuple3<>(dedupId, pivotDedupId, null)); } // add merge relations - if (cut <=0 || r.getAs("position") <= cut) { + if (cut <= 0 || r. getAs("position") <= cut) { res.add(new Tuple3<>(id, pivotDedupId, pivot)); } From f87f3a6483d1ea18945cd8055a3b97a4973b682a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 23 Jan 2024 08:54:37 +0100 Subject: [PATCH 39/56] [graph provision] updated param specification for the XML converter job --- .../dhp/oa/provision/input_params_xml_converter.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json index eda6154d7e..653a69ed11 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json @@ -12,9 +12,9 @@ "paramRequired": true }, { - "paramName": "ilu", - "paramLongName": "isLookupUrl", - "paramDescription": "URL of the isLookUp Service", + "paramName": "cau", + "paramLongName": "contextApiBaseUrl", + "paramDescription": "URL of the context API", "paramRequired": true } ] From 3e96777cc4ce5896a0c1f1af5b5adf00546fec04 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 23 Jan 2024 15:21:03 +0100 Subject: [PATCH 40/56] [collection] increased logging from the oai-pmh metadata collection process --- .../dhp/common/collection/HttpConnector2.java | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 905457bcd0..08cc3ec595 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -8,10 +8,13 @@ import java.io.InputStream; import java.net.*; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.math.NumberUtils; +import org.apache.commons.lang3.time.DateUtils; import org.apache.http.HttpHeaders; +import org.joda.time.Instant; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -98,6 +101,7 @@ public class HttpConnector2 { InputStream input = null; + long start = System.currentTimeMillis(); try { if (getClientParams().getRequestDelay() > 0) { backoffAndSleep(getClientParams().getRequestDelay()); @@ -115,9 +119,8 @@ public class HttpConnector2 { urlConn.addRequestProperty(headerEntry.getKey(), headerEntry.getValue()); } } - if (log.isDebugEnabled()) { - logHeaderFields(urlConn); - } + + logHeaderFields(urlConn); int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT); @@ -167,12 +170,14 @@ public class HttpConnector2 { .warn( "{} - waiting and repeating request after default delay of {} sec.", requestUrl, getClientParams().getRetryDelay()); - backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); + backoffAndSleep(retryNumber * getClientParams().getRetryDelay()); } report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); urlConn.disconnect(); return attemptDownload(requestUrl, retryNumber + 1, report); default: + log.error("gor error {} from URL: {}", urlConn.getResponseCode(), urlConn.getURL()); + log.error("response message: {}", urlConn.getResponseMessage()); report .put( REPORT_PREFIX + urlConn.getResponseCode(), @@ -196,16 +201,21 @@ public class HttpConnector2 { report.put(e.getClass().getName(), e.getMessage()); backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000); return attemptDownload(requestUrl, retryNumber + 1, report); + } finally { + log + .info( + "request time elapsed: {}sec", + TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - start)); } } private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.debug("StatusCode: {}", urlConn.getResponseMessage()); + log.info("StatusCode: {}", urlConn.getResponseMessage()); for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { for (String v : e.getValue()) { - log.debug(" key: {} - value: {}", e.getKey(), v); + log.info(" key: {} - value: {}", e.getKey(), v); } } } From 9b13c22e5d9a6d916f53be71400456712397ebaf Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 23 Jan 2024 15:36:08 +0100 Subject: [PATCH 41/56] [graph provision] retrieve all the context information by adding all=true to the requests issued to thr API --- .../eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java index 96d92fed6e..083dbe988f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java @@ -31,16 +31,19 @@ public class ContextMapper extends HashMap implements Serial final ContextMapper contextMapper = new ContextMapper(); - for (ContextSummary ctx : DNetRestClient.doGET(baseURL + "/contexts", ContextSummaryList.class)) { + for (ContextSummary ctx : DNetRestClient + .doGET(String.format("%s/contexts", baseURL), ContextSummaryList.class)) { contextMapper.put(ctx.getId(), new ContextDef(ctx.getId(), ctx.getLabel(), "context", ctx.getType())); for (CategorySummary cat : DNetRestClient - .doGET(baseURL + "/context/" + ctx.getId(), CategorySummaryList.class)) { + .doGET(String.format("%s/context/%s?all=true", baseURL, ctx.getId()), CategorySummaryList.class)) { contextMapper.put(cat.getId(), new ContextDef(cat.getId(), cat.getLabel(), "category", "")); if (cat.isHasConcept()) { for (ConceptSummary c : DNetRestClient - .doGET(baseURL + "/context/category/" + cat.getId(), ConceptSummaryList.class)) { + .doGET( + String.format("%s/context/category/%s?all=true", baseURL, cat.getId()), + ConceptSummaryList.class)) { contextMapper.put(c.getId(), new ContextDef(c.getId(), c.getLabel(), "concept", "")); if (c.isHasSubConcept()) { for (ConceptSummary cs : c.getConcepts()) { From 2c1e6849f0a43c28c58907829eb6a3f060f48f2c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 24 Jan 2024 10:36:41 +0100 Subject: [PATCH 42/56] added code of conduct and contributing files --- CODE_OF_CONDUCT.md | 43 +++++++++++++++++++++++++++++++++++++++++++ CONTRIBUTING.md | 9 +++++++++ LICENSE => LICENSE.md | 0 README.md | 5 +++++ 4 files changed, 57 insertions(+) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md rename LICENSE => LICENSE.md (100%) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..aff151f945 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,43 @@ +# Contributor Code of Conduct + +Openness, transparency and our community-driven participatory approach guide us in our day-to-day interactions and decision-making. Our open source projects are no exception. Trust, respect, collaboration and transparency are core values we believe should live and breathe within our projects. Our community welcomes participants from around the world with different experiences, unique perspectives, and great ideas to share. + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Attempting collaboration before conflict +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +- Violence, threats of violence, or inciting others to commit self-harm +- The use of sexualized language or imagery and unwelcome sexual attention or advances +- Trolling, intentionally spreading misinformation, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or electronic address, without explicit permission +- Abuse of the reporting process to intentionally harass or exclude others +- Advocating for, or encouraging, any of the above behavior +- Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), [version 1.4](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html). \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..6d83ebbcca --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,9 @@ +# Contributing to D-Net Hadoop + +:+1::tada: First off, thanks for taking the time to contribute! :tada::+1: + +This project and everyone participating in it is governed by our [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it). + +The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. + +All contributions are welcome, all contributions will be considered to be contributed under the [project license](#LICENSE.md). diff --git a/LICENSE b/LICENSE.md similarity index 100% rename from LICENSE rename to LICENSE.md diff --git a/README.md b/README.md index 2c1440f44e..b6575814d5 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,11 @@ Dnet-hadoop is the project that defined all the [OOZIE workflows](https://oozie.apache.org/) for the OpenAIRE Graph construction, processing, provisioning. +This project adheres to the Contributor Covenant [code of conduct](CODE_OF_CONDUCT.md). +By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it). + +This project is licensed under the [AGPL v3 or later version](#LICENSE.md). + How to build, package and run oozie workflows ==================== From 0c97a3a81a55cbdc24342d88d3862a89da1a6c5c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 24 Jan 2024 10:56:33 +0100 Subject: [PATCH 43/56] minor --- CONTRIBUTING.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6d83ebbcca..34a26f9133 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,6 +4,7 @@ This project and everyone participating in it is governed by our [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it). -The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. +The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules, which applies to this project as a while, including all its sub-modules. +Use your best judgment, and feel free to propose changes to this document in a pull request. All contributions are welcome, all contributions will be considered to be contributed under the [project license](#LICENSE.md). From 2838a9b63086493c5d845728336438d01595f56b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 24 Jan 2024 16:07:05 +0100 Subject: [PATCH 44/56] Update 'CONTRIBUTING.md' --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 34a26f9133..13a359c865 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,4 +7,4 @@ This project and everyone participating in it is governed by our [Code of Conduc The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules, which applies to this project as a while, including all its sub-modules. Use your best judgment, and feel free to propose changes to this document in a pull request. -All contributions are welcome, all contributions will be considered to be contributed under the [project license](#LICENSE.md). +All contributions are welcome, all contributions will be considered to be contributed under the [project license](LICENSE.md). From a7115cfa9e595c7db1b41cf9a34b0ae72a08d620 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 25 Jan 2024 15:06:34 +0100 Subject: [PATCH 45/56] max mem of joins (hive.mapjoin.followby.gby.localtask.max.memory.usage) now 80%, up from 55%. --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index cbf97944dc..f15f223209 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -78,6 +78,10 @@ hive.txn.timeout ${hive_timeout} + + hive.mapjoin.followby.gby.localtask.max.memory.usage + 0.80 + mapred.job.queue.name analytics From 9e8fc6aa88d592fc3cb354bc36894b95679b5092 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 26 Jan 2024 09:17:20 +0100 Subject: [PATCH 46/56] [collection] increased logging from the oai-pmh metadata collection process --- .../dhp/common/collection/HttpConnector2.java | 63 ++++++++++++++----- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 08cc3ec595..342d73cdc2 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -97,8 +97,6 @@ public class HttpConnector2 { throw new CollectorException(msg); } - log.info("Request attempt {} [{}]", retryNumber, requestUrl); - InputStream input = null; long start = System.currentTimeMillis(); @@ -106,6 +104,9 @@ public class HttpConnector2 { if (getClientParams().getRequestDelay() > 0) { backoffAndSleep(getClientParams().getRequestDelay()); } + + log.info("Request attempt {} [{}]", retryNumber, requestUrl); + final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection(); urlConn.setInstanceFollowRedirects(false); urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000); @@ -135,9 +136,7 @@ public class HttpConnector2 { } if (is2xx(urlConn.getResponseCode())) { - input = urlConn.getInputStream(); - responseType = urlConn.getContentType(); - return input; + return getInputStream(urlConn, start); } if (is3xx(urlConn.getResponseCode())) { // REDIRECTS @@ -147,6 +146,7 @@ public class HttpConnector2 { .put( REPORT_PREFIX + urlConn.getResponseCode(), String.format("Moved to: %s", newUrl)); + logRequestTime(start); urlConn.disconnect(); if (retryAfter > 0) { backoffAndSleep(retryAfter); @@ -162,19 +162,39 @@ public class HttpConnector2 { if (retryAfter > 0) { log .warn( - "{} - waiting and repeating request after suggested retry-after {} sec.", - requestUrl, retryAfter); + "waiting and repeating request after suggested retry-after {} sec for URL {}", + retryAfter, requestUrl); backoffAndSleep(retryAfter * 1000); } else { log .warn( - "{} - waiting and repeating request after default delay of {} sec.", - requestUrl, getClientParams().getRetryDelay()); + "waiting and repeating request after default delay of {} sec for URL {}", + getClientParams().getRetryDelay(), requestUrl); backoffAndSleep(retryNumber * getClientParams().getRetryDelay()); } report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); + + logRequestTime(start); + urlConn.disconnect(); + return attemptDownload(requestUrl, retryNumber + 1, report); + case 422: // UNPROCESSABLE ENTITY + report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); + log.warn("waiting and repeating request after 10 sec for URL {}", requestUrl); + backoffAndSleep(10000); + urlConn.disconnect(); + logRequestTime(start); + try { + return getInputStream(urlConn, start); + } catch (IOException e) { + log + .error( + "server returned 422 and got IOException accessing the response body from URL {}", + requestUrl); + log.error("IOException:", e); + return attemptDownload(requestUrl, retryNumber + 1, report); + } default: log.error("gor error {} from URL: {}", urlConn.getResponseCode(), urlConn.getURL()); log.error("response message: {}", urlConn.getResponseMessage()); @@ -184,6 +204,8 @@ public class HttpConnector2 { String .format( "%s Error: %s", requestUrl, urlConn.getResponseMessage())); + logRequestTime(start); + urlConn.disconnect(); throw new CollectorException(urlConn.getResponseCode() + " error " + report); } } @@ -201,16 +223,25 @@ public class HttpConnector2 { report.put(e.getClass().getName(), e.getMessage()); backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000); return attemptDownload(requestUrl, retryNumber + 1, report); - } finally { - log - .info( - "request time elapsed: {}sec", - TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - start)); } } + private InputStream getInputStream(HttpURLConnection urlConn, long start) throws IOException { + InputStream input = urlConn.getInputStream(); + responseType = urlConn.getContentType(); + logRequestTime(start); + return input; + } + + private static void logRequestTime(long start) { + log + .info( + "request time elapsed: {}sec", + TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - start)); + } + private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.info("StatusCode: {}", urlConn.getResponseMessage()); + log.info("Response: {} - {}", urlConn.getResponseCode(), urlConn.getResponseMessage()); for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { @@ -235,7 +266,7 @@ public class HttpConnector2 { for (String key : headerMap.keySet()) { if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) && NumberUtils.isCreatable(headerMap.get(key).get(0))) { - return Integer.parseInt(headerMap.get(key).get(0)) + 10; + return Integer.parseInt(headerMap.get(key).get(0)); } } return -1; From e889808daa889530893bab370442811f4dd9dc4f Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 26 Jan 2024 12:19:04 +0100 Subject: [PATCH 47/56] Fixed problem on missing author in crossref Mapping --- .../scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 6 +++--- .../eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json | 1 + .../dhp/doiboost/crossref/CrossrefMappingTest.scala | 7 +++++++ 3 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index ee857e2c4c..64090733d7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -25,7 +25,7 @@ case class mappingAffiliation(name: String) {} case class mappingAuthor( given: Option[String], - family: String, + family: Option[String], sequence: Option[String], ORCID: Option[String], affiliation: Option[mappingAffiliation] @@ -226,14 +226,14 @@ case object Crossref2Oaf { //Mapping Author val authorList: List[mappingAuthor] = - (json \ "author").extractOrElse[List[mappingAuthor]](List()) + (json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined) val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) => a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first") ) result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) => - generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index) + generateAuhtor(a.given.orNull, a.family.get, a.ORCID.orNull, index) }.asJava) // Mapping instance diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json new file mode 100644 index 0000000000..8e75f35866 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json @@ -0,0 +1 @@ +{"indexed":{"date-parts":[[2023,12,29]],"date-time":"2023-12-29T10:40:34Z","timestamp":1703846434800},"reference-count":65,"publisher":"Springer Science and Business Media LLC","license":[{"start":{"date-parts":[[2023,2,9]],"date-time":"2023-02-09T00:00:00Z","timestamp":1675900800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,2,9]],"date-time":"2023-02-09T00:00:00Z","timestamp":1675900800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat. Phys."],"DOI":"10.1038\/s41567-022-01757-y","type":"journal-article","created":{"date-parts":[[2023,2,10]],"date-time":"2023-02-10T15:32:19Z","timestamp":1676043139000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Observation of electroweak production of two jets and a Z-boson pair"],"prefix":"10.1038","author":[{"name":"ATLAS Collaboration","sequence":"first","affiliation":[]},{"given":"G.","family":"Aad","sequence":"additional","affiliation":[]},{"given":"B.","family":"Abbott","sequence":"additional","affiliation":[]},{"given":"D. C.","family":"Abbott","sequence":"additional","affiliation":[]},{"given":"A.","family":"Abed Abud","sequence":"additional","affiliation":[]},{"given":"K.","family":"Abeling","sequence":"additional","affiliation":[]},{"given":"D. K.","family":"Abhayasinghe","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Abidi","sequence":"additional","affiliation":[]},{"given":"O. S.","family":"AbouZeid","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Abraham","sequence":"additional","affiliation":[]},{"given":"H.","family":"Abramowicz","sequence":"additional","affiliation":[]},{"given":"H.","family":"Abreu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Abulaiti","sequence":"additional","affiliation":[]},{"given":"B. S.","family":"Acharya","sequence":"additional","affiliation":[]},{"given":"B.","family":"Achkar","sequence":"additional","affiliation":[]},{"given":"S.","family":"Adachi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Adam","sequence":"additional","affiliation":[]},{"given":"C. Adam","family":"Bourdarios","sequence":"additional","affiliation":[]},{"given":"L.","family":"Adamczyk","sequence":"additional","affiliation":[]},{"given":"L.","family":"Adamek","sequence":"additional","affiliation":[]},{"given":"J.","family":"Adelman","sequence":"additional","affiliation":[]},{"given":"M.","family":"Adersberger","sequence":"additional","affiliation":[]},{"given":"A.","family":"Adiguzel","sequence":"additional","affiliation":[]},{"given":"S.","family":"Adorni","sequence":"additional","affiliation":[]},{"given":"T.","family":"Adye","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Affolder","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Afik","sequence":"additional","affiliation":[]},{"given":"C.","family":"Agapopoulou","sequence":"additional","affiliation":[]},{"given":"M. N.","family":"Agaras","sequence":"additional","affiliation":[]},{"given":"A.","family":"Aggarwal","sequence":"additional","affiliation":[]},{"given":"C.","family":"Agheorghiesei","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Aguilar-Saavedra","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ahmadov","sequence":"additional","affiliation":[]},{"given":"W. S.","family":"Ahmed","sequence":"additional","affiliation":[]},{"given":"X.","family":"Ai","sequence":"additional","affiliation":[]},{"given":"G.","family":"Aielli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Akatsuka","sequence":"additional","affiliation":[]},{"given":"T. P. A.","family":"\u00c5kesson","sequence":"additional","affiliation":[]},{"given":"E.","family":"Akilli","sequence":"additional","affiliation":[]},{"given":"A. V.","family":"Akimov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Al Khoury","sequence":"additional","affiliation":[]},{"given":"G. L.","family":"Alberghi","sequence":"additional","affiliation":[]},{"given":"J.","family":"Albert","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Alconada Verzini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Alderweireldt","sequence":"additional","affiliation":[]},{"given":"M.","family":"Aleksa","sequence":"additional","affiliation":[]},{"given":"I. N.","family":"Aleksandrov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Alexa","sequence":"additional","affiliation":[]},{"given":"T.","family":"Alexopoulos","sequence":"additional","affiliation":[]},{"given":"A.","family":"Alfonsi","sequence":"additional","affiliation":[]},{"given":"F.","family":"Alfonsi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Alhroob","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ali","sequence":"additional","affiliation":[]},{"given":"M.","family":"Aliev","sequence":"additional","affiliation":[]},{"given":"G.","family":"Alimonti","sequence":"additional","affiliation":[]},{"given":"C.","family":"Allaire","sequence":"additional","affiliation":[]},{"given":"B. M. M.","family":"Allbrooke","sequence":"additional","affiliation":[]},{"given":"B. W.","family":"Allen","sequence":"additional","affiliation":[]},{"given":"P. P.","family":"Allport","sequence":"additional","affiliation":[]},{"given":"A.","family":"Aloisio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Alonso","sequence":"additional","affiliation":[]},{"given":"C.","family":"Alpigiani","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Alshehri","sequence":"additional","affiliation":[]},{"given":"E.","family":"Alunno Camelia","sequence":"additional","affiliation":[]},{"given":"M.","family":"Alvarez Estevez","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Alviggi","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Amaral Coutinho","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ambler","sequence":"additional","affiliation":[]},{"given":"L.","family":"Ambroz","sequence":"additional","affiliation":[]},{"given":"C.","family":"Amelung","sequence":"additional","affiliation":[]},{"given":"D.","family":"Amidei","sequence":"additional","affiliation":[]},{"given":"S. P. Amor","family":"Dos Santos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Amoroso","sequence":"additional","affiliation":[]},{"given":"C. S.","family":"Amrouche","sequence":"additional","affiliation":[]},{"given":"F.","family":"An","sequence":"additional","affiliation":[]},{"given":"C.","family":"Anastopoulos","sequence":"additional","affiliation":[]},{"given":"N.","family":"Andari","sequence":"additional","affiliation":[]},{"given":"T.","family":"Andeen","sequence":"additional","affiliation":[]},{"given":"C. F.","family":"Anders","sequence":"additional","affiliation":[]},{"given":"J. K.","family":"Anders","sequence":"additional","affiliation":[]},{"given":"A.","family":"Andreazza","sequence":"additional","affiliation":[]},{"given":"V.","family":"Andrei","sequence":"additional","affiliation":[]},{"given":"C. R.","family":"Anelli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Angelidakis","sequence":"additional","affiliation":[]},{"given":"A.","family":"Angerami","sequence":"additional","affiliation":[]},{"given":"A. V.","family":"Anisenkov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Annovi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Antel","sequence":"additional","affiliation":[]},{"given":"M. T.","family":"Anthony","sequence":"additional","affiliation":[]},{"given":"E.","family":"Antipov","sequence":"additional","affiliation":[]},{"given":"M.","family":"Antonelli","sequence":"additional","affiliation":[]},{"given":"D. J. A.","family":"Antrim","sequence":"additional","affiliation":[]},{"given":"F.","family":"Anulli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Aoki","sequence":"additional","affiliation":[]},{"given":"J. A. Aparisi","family":"Pozo","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Aparo","sequence":"additional","affiliation":[]},{"given":"L. Aperio","family":"Bella","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Araque","sequence":"additional","affiliation":[]},{"given":"V. Araujo","family":"Ferraz","sequence":"additional","affiliation":[]},{"given":"R. Araujo","family":"Pereira","sequence":"additional","affiliation":[]},{"given":"C.","family":"Arcangeletti","sequence":"additional","affiliation":[]},{"given":"A. T. H.","family":"Arce","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Arduh","sequence":"additional","affiliation":[]},{"given":"J-F.","family":"Arguin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Argyropoulos","sequence":"additional","affiliation":[]},{"given":"J.-H.","family":"Arling","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Armbruster","sequence":"additional","affiliation":[]},{"given":"A.","family":"Armstrong","sequence":"additional","affiliation":[]},{"given":"O.","family":"Arnaez","sequence":"additional","affiliation":[]},{"given":"H.","family":"Arnold","sequence":"additional","affiliation":[]},{"given":"Z. P. Arrubarrena","family":"Tame","sequence":"additional","affiliation":[]},{"given":"G.","family":"Artoni","sequence":"additional","affiliation":[]},{"given":"S.","family":"Artz","sequence":"additional","affiliation":[]},{"given":"S.","family":"Asai","sequence":"additional","affiliation":[]},{"given":"T.","family":"Asawatavonvanich","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Asbah","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Asimakopoulou","sequence":"additional","affiliation":[]},{"given":"L.","family":"Asquith","sequence":"additional","affiliation":[]},{"given":"J.","family":"Assahsah","sequence":"additional","affiliation":[]},{"given":"K.","family":"Assamagan","sequence":"additional","affiliation":[]},{"given":"R.","family":"Astalos","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Atkin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Atkinson","sequence":"additional","affiliation":[]},{"given":"N. B.","family":"Atlay","sequence":"additional","affiliation":[]},{"given":"H.","family":"Atmani","sequence":"additional","affiliation":[]},{"given":"K.","family":"Augsten","sequence":"additional","affiliation":[]},{"given":"G.","family":"Avolio","sequence":"additional","affiliation":[]},{"given":"M. K.","family":"Ayoub","sequence":"additional","affiliation":[]},{"given":"G.","family":"Azuelos","sequence":"additional","affiliation":[]},{"given":"H.","family":"Bachacou","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bachas","sequence":"additional","affiliation":[]},{"given":"M.","family":"Backes","sequence":"additional","affiliation":[]},{"given":"F.","family":"Backman","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bagnaia","sequence":"additional","affiliation":[]},{"given":"H.","family":"Bahrasemani","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Bailey","sequence":"additional","affiliation":[]},{"given":"V. R.","family":"Bailey","sequence":"additional","affiliation":[]},{"given":"J. T.","family":"Baines","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bakalis","sequence":"additional","affiliation":[]},{"given":"O. K.","family":"Baker","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Bakker","sequence":"additional","affiliation":[]},{"given":"D. Bakshi","family":"Gupta","sequence":"additional","affiliation":[]},{"given":"S.","family":"Balaji","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Baldin","sequence":"additional","affiliation":[]},{"given":"P.","family":"Balek","sequence":"additional","affiliation":[]},{"given":"F.","family":"Balli","sequence":"additional","affiliation":[]},{"given":"W. K.","family":"Balunas","sequence":"additional","affiliation":[]},{"given":"J.","family":"Balz","sequence":"additional","affiliation":[]},{"given":"E.","family":"Banas","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bandyopadhyay","sequence":"additional","affiliation":[]},{"given":"Sw.","family":"Banerjee","sequence":"additional","affiliation":[]},{"given":"A. A. E.","family":"Bannoura","sequence":"additional","affiliation":[]},{"given":"L.","family":"Barak","sequence":"additional","affiliation":[]},{"given":"W. M.","family":"Barbe","sequence":"additional","affiliation":[]},{"given":"E. L.","family":"Barberio","sequence":"additional","affiliation":[]},{"given":"D.","family":"Barberis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Barbero","sequence":"additional","affiliation":[]},{"given":"G.","family":"Barbour","sequence":"additional","affiliation":[]},{"given":"T.","family":"Barillari","sequence":"additional","affiliation":[]},{"given":"M-S.","family":"Barisits","sequence":"additional","affiliation":[]},{"given":"J.","family":"Barkeloo","sequence":"additional","affiliation":[]},{"given":"T.","family":"Barklow","sequence":"additional","affiliation":[]},{"given":"R.","family":"Barnea","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Barnett","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Barnett","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Barnovska-Blenessy","sequence":"additional","affiliation":[]},{"given":"A.","family":"Baroncelli","sequence":"additional","affiliation":[]},{"given":"G.","family":"Barone","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Barr","sequence":"additional","affiliation":[]},{"given":"L.","family":"Barranco Navarro","sequence":"additional","affiliation":[]},{"given":"F.","family":"Barreiro","sequence":"additional","affiliation":[]},{"given":"J.","family":"Barreiro Guimar\u00e3es da Costa","sequence":"additional","affiliation":[]},{"given":"S.","family":"Barsov","sequence":"additional","affiliation":[]},{"given":"R.","family":"Bartoldus","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bartolini","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Barton","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bartos","sequence":"additional","affiliation":[]},{"given":"A.","family":"Basalaev","sequence":"additional","affiliation":[]},{"given":"A.","family":"Basan","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bassalat","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Basso","sequence":"additional","affiliation":[]},{"given":"R. L.","family":"Bates","sequence":"additional","affiliation":[]},{"given":"S.","family":"Batlamous","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Batley","sequence":"additional","affiliation":[]},{"given":"B.","family":"Batool","sequence":"additional","affiliation":[]},{"given":"M.","family":"Battaglia","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bauce","sequence":"additional","affiliation":[]},{"given":"F.","family":"Bauer","sequence":"additional","affiliation":[]},{"given":"K. T.","family":"Bauer","sequence":"additional","affiliation":[]},{"given":"H. S.","family":"Bawa","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"Beacham","sequence":"additional","affiliation":[]},{"given":"T.","family":"Beau","sequence":"additional","affiliation":[]},{"given":"P. H.","family":"Beauchemin","sequence":"additional","affiliation":[]},{"given":"F.","family":"Becherer","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bechtle","sequence":"additional","affiliation":[]},{"given":"H. C.","family":"Beck","sequence":"additional","affiliation":[]},{"given":"H. P.","family":"Beck","sequence":"additional","affiliation":[]},{"given":"K.","family":"Becker","sequence":"additional","affiliation":[]},{"given":"C.","family":"Becot","sequence":"additional","affiliation":[]},{"given":"A.","family":"Beddall","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Beddall","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Bednyakov","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bedognetti","sequence":"additional","affiliation":[]},{"given":"C. P.","family":"Bee","sequence":"additional","affiliation":[]},{"given":"T. A.","family":"Beermann","sequence":"additional","affiliation":[]},{"given":"M.","family":"Begalli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Begel","sequence":"additional","affiliation":[]},{"given":"A.","family":"Behera","sequence":"additional","affiliation":[]},{"given":"J. K.","family":"Behr","sequence":"additional","affiliation":[]},{"given":"F.","family":"Beisiegel","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Bell","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bella","sequence":"additional","affiliation":[]},{"given":"L.","family":"Bellagamba","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bellerive","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bellos","sequence":"additional","affiliation":[]},{"given":"K.","family":"Beloborodov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Belotskiy","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Belyaev","sequence":"additional","affiliation":[]},{"given":"D.","family":"Benchekroun","sequence":"additional","affiliation":[]},{"given":"N.","family":"Benekos","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Benhammou","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Benjamin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Benoit","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Bensinger","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bentvelsen","sequence":"additional","affiliation":[]},{"given":"L.","family":"Beresford","sequence":"additional","affiliation":[]},{"given":"M.","family":"Beretta","sequence":"additional","affiliation":[]},{"given":"D.","family":"Berge","sequence":"additional","affiliation":[]},{"given":"E. Bergeaas","family":"Kuutmann","sequence":"additional","affiliation":[]},{"given":"N.","family":"Berger","sequence":"additional","affiliation":[]},{"given":"B.","family":"Bergmann","sequence":"additional","affiliation":[]},{"given":"L. J.","family":"Bergsten","sequence":"additional","affiliation":[]},{"given":"J.","family":"Beringer","sequence":"additional","affiliation":[]},{"given":"S.","family":"Berlendis","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bernardi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bernius","sequence":"additional","affiliation":[]},{"given":"F. U.","family":"Bernlochner","sequence":"additional","affiliation":[]},{"given":"T.","family":"Berry","sequence":"additional","affiliation":[]},{"given":"P.","family":"Berta","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bertella","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Bertram","sequence":"additional","affiliation":[]},{"given":"O.","family":"Bessidskaia Bylund","sequence":"additional","affiliation":[]},{"given":"N.","family":"Besson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bethani","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bethke","sequence":"additional","affiliation":[]},{"given":"A.","family":"Betti","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Bevan","sequence":"additional","affiliation":[]},{"given":"J.","family":"Beyer","sequence":"additional","affiliation":[]},{"given":"D. S.","family":"Bhattacharya","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bhattarai","sequence":"additional","affiliation":[]},{"given":"R.","family":"Bi","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Bianchi","sequence":"additional","affiliation":[]},{"given":"O.","family":"Biebel","sequence":"additional","affiliation":[]},{"given":"D.","family":"Biedermann","sequence":"additional","affiliation":[]},{"given":"R.","family":"Bielski","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bierwagen","sequence":"additional","affiliation":[]},{"given":"N. V.","family":"Biesuz","sequence":"additional","affiliation":[]},{"given":"M.","family":"Biglietti","sequence":"additional","affiliation":[]},{"given":"T. R. V.","family":"Billoud","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bindi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bingul","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Biondi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Birman","sequence":"additional","affiliation":[]},{"given":"T.","family":"Bisanz","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Biswal","sequence":"additional","affiliation":[]},{"given":"D.","family":"Biswas","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bitadze","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bittrich","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bj\u00f8rke","sequence":"additional","affiliation":[]},{"given":"T.","family":"Blazek","sequence":"additional","affiliation":[]},{"given":"I.","family":"Bloch","sequence":"additional","affiliation":[]},{"given":"C.","family":"Blocker","sequence":"additional","affiliation":[]},{"given":"A.","family":"Blue","sequence":"additional","affiliation":[]},{"given":"U.","family":"Blumenschein","sequence":"additional","affiliation":[]},{"given":"G. J.","family":"Bobbink","sequence":"additional","affiliation":[]},{"given":"V. S.","family":"Bobrovnikov","sequence":"additional","affiliation":[]},{"given":"S. S.","family":"Bocchetta","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bocci","sequence":"additional","affiliation":[]},{"given":"D.","family":"Bogavac","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Bogdanchikov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bohm","sequence":"additional","affiliation":[]},{"given":"V.","family":"Boisvert","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bokan","sequence":"additional","affiliation":[]},{"given":"T.","family":"Bold","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Bolz","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bomben","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bona","sequence":"additional","affiliation":[]},{"given":"J. S.","family":"Bonilla","sequence":"additional","affiliation":[]},{"given":"M.","family":"Boonekamp","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Booth","sequence":"additional","affiliation":[]},{"given":"H. M.","family":"Borecka-Bielska","sequence":"additional","affiliation":[]},{"given":"L. S.","family":"Borgna","sequence":"additional","affiliation":[]},{"given":"A.","family":"Borisov","sequence":"additional","affiliation":[]},{"given":"G.","family":"Borissov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Bortfeldt","sequence":"additional","affiliation":[]},{"given":"D.","family":"Bortoletto","sequence":"additional","affiliation":[]},{"given":"D.","family":"Boscherini","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bosman","sequence":"additional","affiliation":[]},{"given":"J. D. Bossio","family":"Sola","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bouaouda","sequence":"additional","affiliation":[]},{"given":"J.","family":"Boudreau","sequence":"additional","affiliation":[]},{"given":"E. V.","family":"Bouhova-Thacker","sequence":"additional","affiliation":[]},{"given":"D.","family":"Boumediene","sequence":"additional","affiliation":[]},{"given":"S. K.","family":"Boutle","sequence":"additional","affiliation":[]},{"given":"A.","family":"Boveia","sequence":"additional","affiliation":[]},{"given":"J.","family":"Boyd","sequence":"additional","affiliation":[]},{"given":"D.","family":"Boye","sequence":"additional","affiliation":[]},{"given":"I. R.","family":"Boyko","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Bozson","sequence":"additional","affiliation":[]},{"given":"J.","family":"Bracinik","sequence":"additional","affiliation":[]},{"given":"N.","family":"Brahimi","sequence":"additional","affiliation":[]},{"given":"G.","family":"Brandt","sequence":"additional","affiliation":[]},{"given":"O.","family":"Brandt","sequence":"additional","affiliation":[]},{"given":"F.","family":"Braren","sequence":"additional","affiliation":[]},{"given":"B.","family":"Brau","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Brau","sequence":"additional","affiliation":[]},{"given":"W. D. Breaden","family":"Madden","sequence":"additional","affiliation":[]},{"given":"K.","family":"Brendlinger","sequence":"additional","affiliation":[]},{"given":"L.","family":"Brenner","sequence":"additional","affiliation":[]},{"given":"R.","family":"Brenner","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bressler","sequence":"additional","affiliation":[]},{"given":"B.","family":"Brickwedde","sequence":"additional","affiliation":[]},{"given":"D. L.","family":"Briglin","sequence":"additional","affiliation":[]},{"given":"D.","family":"Britton","sequence":"additional","affiliation":[]},{"given":"D.","family":"Britzger","sequence":"additional","affiliation":[]},{"given":"I.","family":"Brock","sequence":"additional","affiliation":[]},{"given":"R.","family":"Brock","sequence":"additional","affiliation":[]},{"given":"G.","family":"Brooijmans","sequence":"additional","affiliation":[]},{"given":"W. K.","family":"Brooks","sequence":"additional","affiliation":[]},{"given":"E.","family":"Brost","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Broughton","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Bruckman de Renstrom","sequence":"additional","affiliation":[]},{"given":"D.","family":"Bruncko","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bruni","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bruni","sequence":"additional","affiliation":[]},{"given":"L. S.","family":"Bruni","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bruno","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bruschi","sequence":"additional","affiliation":[]},{"given":"N.","family":"Bruscino","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bryant","sequence":"additional","affiliation":[]},{"given":"L.","family":"Bryngemark","sequence":"additional","affiliation":[]},{"given":"T.","family":"Buanes","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Buat","sequence":"additional","affiliation":[]},{"given":"P.","family":"Buchholz","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Buckley","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Budagov","sequence":"additional","affiliation":[]},{"given":"M. K.","family":"Bugge","sequence":"additional","affiliation":[]},{"given":"F.","family":"B\u00fchrer","sequence":"additional","affiliation":[]},{"given":"O.","family":"Bulekov","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Burch","sequence":"additional","affiliation":[]},{"given":"S.","family":"Burdin","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Burgard","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Burger","sequence":"additional","affiliation":[]},{"given":"B.","family":"Burghgrave","sequence":"additional","affiliation":[]},{"given":"J. T. P.","family":"Burr","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Burton","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Burzynski","sequence":"additional","affiliation":[]},{"given":"V.","family":"B\u00fcscher","sequence":"additional","affiliation":[]},{"given":"E.","family":"Buschmann","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Bussey","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Butler","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Buttar","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Butterworth","sequence":"additional","affiliation":[]},{"given":"P.","family":"Butti","sequence":"additional","affiliation":[]},{"given":"W.","family":"Buttinger","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Buxo Vazquez","sequence":"additional","affiliation":[]},{"given":"A.","family":"Buzatu","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Buzykaev","sequence":"additional","affiliation":[]},{"given":"G.","family":"Cabras","sequence":"additional","affiliation":[]},{"given":"S.","family":"Cabrera Urb\u00e1n","sequence":"additional","affiliation":[]},{"given":"D.","family":"Caforio","sequence":"additional","affiliation":[]},{"given":"H.","family":"Cai","sequence":"additional","affiliation":[]},{"given":"V. M. M.","family":"Cairo","sequence":"additional","affiliation":[]},{"given":"O.","family":"Cakir","sequence":"additional","affiliation":[]},{"given":"N.","family":"Calace","sequence":"additional","affiliation":[]},{"given":"P.","family":"Calafiura","sequence":"additional","affiliation":[]},{"given":"A.","family":"Calandri","sequence":"additional","affiliation":[]},{"given":"G.","family":"Calderini","sequence":"additional","affiliation":[]},{"given":"P.","family":"Calfayan","sequence":"additional","affiliation":[]},{"given":"G.","family":"Callea","sequence":"additional","affiliation":[]},{"given":"L. P.","family":"Caloba","sequence":"additional","affiliation":[]},{"given":"A.","family":"Caltabiano","sequence":"additional","affiliation":[]},{"given":"S.","family":"Calvente Lopez","sequence":"additional","affiliation":[]},{"given":"D.","family":"Calvet","sequence":"additional","affiliation":[]},{"given":"S.","family":"Calvet","sequence":"additional","affiliation":[]},{"given":"T. P.","family":"Calvet","sequence":"additional","affiliation":[]},{"given":"M.","family":"Calvetti","sequence":"additional","affiliation":[]},{"given":"R.","family":"Camacho Toro","sequence":"additional","affiliation":[]},{"given":"S.","family":"Camarda","sequence":"additional","affiliation":[]},{"given":"D.","family":"Camarero Munoz","sequence":"additional","affiliation":[]},{"given":"P.","family":"Camarri","sequence":"additional","affiliation":[]},{"given":"D.","family":"Cameron","sequence":"additional","affiliation":[]},{"given":"C.","family":"Camincher","sequence":"additional","affiliation":[]},{"given":"S.","family":"Campana","sequence":"additional","affiliation":[]},{"given":"M.","family":"Campanelli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Camplani","sequence":"additional","affiliation":[]},{"given":"A.","family":"Campoverde","sequence":"additional","affiliation":[]},{"given":"V.","family":"Canale","sequence":"additional","affiliation":[]},{"given":"A.","family":"Canesse","sequence":"additional","affiliation":[]},{"given":"M. Cano","family":"Bret","sequence":"additional","affiliation":[]},{"given":"J.","family":"Cantero","sequence":"additional","affiliation":[]},{"given":"T.","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Cao","sequence":"additional","affiliation":[]},{"given":"M. D. M.","family":"Capeans Garrido","sequence":"additional","affiliation":[]},{"given":"M.","family":"Capua","sequence":"additional","affiliation":[]},{"given":"R.","family":"Cardarelli","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cardillo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Carducci","sequence":"additional","affiliation":[]},{"given":"I.","family":"Carli","sequence":"additional","affiliation":[]},{"given":"T.","family":"Carli","sequence":"additional","affiliation":[]},{"given":"G.","family":"Carlino","sequence":"additional","affiliation":[]},{"given":"B. T.","family":"Carlson","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Carlson","sequence":"additional","affiliation":[]},{"given":"L.","family":"Carminati","sequence":"additional","affiliation":[]},{"given":"R. M. D.","family":"Carney","sequence":"additional","affiliation":[]},{"given":"S.","family":"Caron","sequence":"additional","affiliation":[]},{"given":"E.","family":"Carquin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Carr\u00e1","sequence":"additional","affiliation":[]},{"given":"J. W. S.","family":"Carter","sequence":"additional","affiliation":[]},{"given":"M. P.","family":"Casado","sequence":"additional","affiliation":[]},{"given":"A. F.","family":"Casha","sequence":"additional","affiliation":[]},{"given":"R.","family":"Castelijn","sequence":"additional","affiliation":[]},{"given":"F. L.","family":"Castillo","sequence":"additional","affiliation":[]},{"given":"L.","family":"Castillo Garcia","sequence":"additional","affiliation":[]},{"given":"V.","family":"Castillo Gimenez","sequence":"additional","affiliation":[]},{"given":"N. F.","family":"Castro","sequence":"additional","affiliation":[]},{"given":"A.","family":"Catinaccio","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Catmore","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cattai","sequence":"additional","affiliation":[]},{"given":"V.","family":"Cavaliere","sequence":"additional","affiliation":[]},{"given":"E.","family":"Cavallaro","sequence":"additional","affiliation":[]},{"given":"M.","family":"Cavalli-Sforza","sequence":"additional","affiliation":[]},{"given":"V.","family":"Cavasinni","sequence":"additional","affiliation":[]},{"given":"E.","family":"Celebi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Cerda Alberich","sequence":"additional","affiliation":[]},{"given":"K.","family":"Cerny","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Cerqueira","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cerri","sequence":"additional","affiliation":[]},{"given":"L.","family":"Cerrito","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cerutti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cervelli","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Cetin","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Chadi","sequence":"additional","affiliation":[]},{"given":"D.","family":"Chakraborty","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chan","sequence":"additional","affiliation":[]},{"given":"W. S.","family":"Chan","sequence":"additional","affiliation":[]},{"given":"W. Y.","family":"Chan","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Chapman","sequence":"additional","affiliation":[]},{"given":"B.","family":"Chargeishvili","sequence":"additional","affiliation":[]},{"given":"D. G.","family":"Charlton","sequence":"additional","affiliation":[]},{"given":"T. P.","family":"Charman","sequence":"additional","affiliation":[]},{"given":"C. C.","family":"Chau","sequence":"additional","affiliation":[]},{"given":"S.","family":"Che","sequence":"additional","affiliation":[]},{"given":"S.","family":"Chekanov","sequence":"additional","affiliation":[]},{"given":"S. V.","family":"Chekulaev","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Chelkov","sequence":"additional","affiliation":[]},{"given":"B.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"C. H.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"H.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"S.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"X.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Y-H.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"H. C.","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cheplakov","sequence":"additional","affiliation":[]},{"given":"E.","family":"Cheremushkina","sequence":"additional","affiliation":[]},{"given":"R.","family":"Cherkaoui El Moursli","sequence":"additional","affiliation":[]},{"given":"E.","family":"Cheu","sequence":"additional","affiliation":[]},{"given":"K.","family":"Cheung","sequence":"additional","affiliation":[]},{"given":"T. J. A.","family":"Cheval\u00e9rias","sequence":"additional","affiliation":[]},{"given":"L.","family":"Chevalier","sequence":"additional","affiliation":[]},{"given":"V.","family":"Chiarella","sequence":"additional","affiliation":[]},{"given":"G.","family":"Chiarelli","sequence":"additional","affiliation":[]},{"given":"G.","family":"Chiodini","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Chisholm","sequence":"additional","affiliation":[]},{"given":"A.","family":"Chitan","sequence":"additional","affiliation":[]},{"given":"I.","family":"Chiu","sequence":"additional","affiliation":[]},{"given":"Y. H.","family":"Chiu","sequence":"additional","affiliation":[]},{"given":"M. V.","family":"Chizhov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Choi","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Chomont","sequence":"additional","affiliation":[]},{"given":"S.","family":"Chouridou","sequence":"additional","affiliation":[]},{"given":"E. Y. S.","family":"Chow","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Chu","sequence":"additional","affiliation":[]},{"given":"X.","family":"Chu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chudoba","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Chwastowski","sequence":"additional","affiliation":[]},{"given":"L.","family":"Chytka","sequence":"additional","affiliation":[]},{"given":"D.","family":"Cieri","sequence":"additional","affiliation":[]},{"given":"K. M.","family":"Ciesla","sequence":"additional","affiliation":[]},{"given":"D.","family":"Cinca","sequence":"additional","affiliation":[]},{"given":"V.","family":"Cindro","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Cioar\u0103","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ciocio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cirotto","sequence":"additional","affiliation":[]},{"given":"Z. H.","family":"Citron","sequence":"additional","affiliation":[]},{"given":"M.","family":"Citterio","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Ciubotaru","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Ciungu","sequence":"additional","affiliation":[]},{"given":"A.","family":"Clark","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Clark","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Clark","sequence":"additional","affiliation":[]},{"given":"C.","family":"Clement","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Coadou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Cobal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Coccaro","sequence":"additional","affiliation":[]},{"given":"J.","family":"Cochran","sequence":"additional","affiliation":[]},{"given":"R.","family":"Coelho Lopes De Sa","sequence":"additional","affiliation":[]},{"given":"H.","family":"Cohen","sequence":"additional","affiliation":[]},{"given":"A. E. C.","family":"Coimbra","sequence":"additional","affiliation":[]},{"given":"B.","family":"Cole","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"Colijn","sequence":"additional","affiliation":[]},{"given":"J.","family":"Collot","sequence":"additional","affiliation":[]},{"given":"P. Conde","family":"Mui\u00f1o","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Connell","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Connelly","sequence":"additional","affiliation":[]},{"given":"S.","family":"Constantinescu","sequence":"additional","affiliation":[]},{"given":"F.","family":"Conventi","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Cooper-Sarkar","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cormier","sequence":"additional","affiliation":[]},{"given":"K. J. R.","family":"Cormier","sequence":"additional","affiliation":[]},{"given":"L. D.","family":"Corpe","sequence":"additional","affiliation":[]},{"given":"M.","family":"Corradi","sequence":"additional","affiliation":[]},{"given":"E. E.","family":"Corrigan","sequence":"additional","affiliation":[]},{"given":"F.","family":"Corriveau","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Costa","sequence":"additional","affiliation":[]},{"given":"F.","family":"Costanza","sequence":"additional","affiliation":[]},{"given":"D.","family":"Costanzo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Cowan","sequence":"additional","affiliation":[]},{"given":"J. W.","family":"Cowley","sequence":"additional","affiliation":[]},{"given":"J.","family":"Crane","sequence":"additional","affiliation":[]},{"given":"K.","family":"Cranmer","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Crawley","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Creager","sequence":"additional","affiliation":[]},{"given":"S.","family":"Cr\u00e9p\u00e9-Renaudin","sequence":"additional","affiliation":[]},{"given":"F.","family":"Crescioli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Cristinziani","sequence":"additional","affiliation":[]},{"given":"V.","family":"Croft","sequence":"additional","affiliation":[]},{"given":"G.","family":"Crosetti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cueto","sequence":"additional","affiliation":[]},{"given":"T.","family":"Cuhadar Donszelmann","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Cukierman","sequence":"additional","affiliation":[]},{"given":"W. R.","family":"Cunningham","sequence":"additional","affiliation":[]},{"given":"S.","family":"Czekierda","sequence":"additional","affiliation":[]},{"given":"P.","family":"Czodrowski","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Da Cunha Sargedas De Sousa","sequence":"additional","affiliation":[]},{"given":"J. V.","family":"Da Fonseca Pinto","sequence":"additional","affiliation":[]},{"given":"C.","family":"Da Via","sequence":"additional","affiliation":[]},{"given":"W.","family":"Dabrowski","sequence":"additional","affiliation":[]},{"given":"F.","family":"Dachs","sequence":"additional","affiliation":[]},{"given":"T.","family":"Dado","sequence":"additional","affiliation":[]},{"given":"S.","family":"Dahbi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Dai","sequence":"additional","affiliation":[]},{"given":"C.","family":"Dallapiccola","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dam","sequence":"additional","affiliation":[]},{"given":"G.","family":"D\u2019amen","sequence":"additional","affiliation":[]},{"given":"V.","family":"D\u2019Amico","sequence":"additional","affiliation":[]},{"given":"J.","family":"Damp","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Dandoy","sequence":"additional","affiliation":[]},{"given":"M. F.","family":"Daneri","sequence":"additional","affiliation":[]},{"given":"N. S.","family":"Dann","sequence":"additional","affiliation":[]},{"given":"M.","family":"Danninger","sequence":"additional","affiliation":[]},{"given":"V.","family":"Dao","sequence":"additional","affiliation":[]},{"given":"G.","family":"Darbo","sequence":"additional","affiliation":[]},{"given":"O.","family":"Dartsi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dattagupta","sequence":"additional","affiliation":[]},{"given":"T.","family":"Daubney","sequence":"additional","affiliation":[]},{"given":"S.","family":"D\u2019Auria","sequence":"additional","affiliation":[]},{"given":"C.","family":"David","sequence":"additional","affiliation":[]},{"given":"T.","family":"Davidek","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Davis","sequence":"additional","affiliation":[]},{"given":"I.","family":"Dawson","sequence":"additional","affiliation":[]},{"given":"K.","family":"De","sequence":"additional","affiliation":[]},{"given":"R.","family":"De Asmundis","sequence":"additional","affiliation":[]},{"given":"M.","family":"De Beurs","sequence":"additional","affiliation":[]},{"given":"S.","family":"De Castro","sequence":"additional","affiliation":[]},{"given":"S.","family":"De Cecco","sequence":"additional","affiliation":[]},{"given":"N.","family":"De Groot","sequence":"additional","affiliation":[]},{"given":"P.","family":"de Jong","sequence":"additional","affiliation":[]},{"given":"H.","family":"De la Torre","sequence":"additional","affiliation":[]},{"given":"A.","family":"De Maria","sequence":"additional","affiliation":[]},{"given":"D.","family":"De Pedis","sequence":"additional","affiliation":[]},{"given":"A.","family":"De Salvo","sequence":"additional","affiliation":[]},{"given":"U.","family":"De Sanctis","sequence":"additional","affiliation":[]},{"given":"M.","family":"De Santis","sequence":"additional","affiliation":[]},{"given":"A.","family":"De Santo","sequence":"additional","affiliation":[]},{"given":"K.","family":"De Vasconcelos Corga","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"De Vivie De Regie","sequence":"additional","affiliation":[]},{"given":"C.","family":"Debenedetti","sequence":"additional","affiliation":[]},{"given":"D. V.","family":"Dedovich","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Deiana","sequence":"additional","affiliation":[]},{"given":"J.","family":"Del Peso","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Delabat Diaz","sequence":"additional","affiliation":[]},{"given":"D.","family":"Delgove","sequence":"additional","affiliation":[]},{"given":"F.","family":"Deliot","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Delitzsch","sequence":"additional","affiliation":[]},{"given":"M.","family":"Della Pietra","sequence":"additional","affiliation":[]},{"given":"D.","family":"Della Volpe","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dell\u2019Acqua","sequence":"additional","affiliation":[]},{"given":"L.","family":"Dell\u2019Asta","sequence":"additional","affiliation":[]},{"given":"M.","family":"Delmastro","sequence":"additional","affiliation":[]},{"given":"C.","family":"Delporte","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Delsart","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"DeMarco","sequence":"additional","affiliation":[]},{"given":"S.","family":"Demers","sequence":"additional","affiliation":[]},{"given":"M.","family":"Demichev","sequence":"additional","affiliation":[]},{"given":"G.","family":"Demontigny","sequence":"additional","affiliation":[]},{"given":"S. P.","family":"Denisov","sequence":"additional","affiliation":[]},{"given":"L.","family":"D\u2019Eramo","sequence":"additional","affiliation":[]},{"given":"D.","family":"Derendarz","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Derkaoui","sequence":"additional","affiliation":[]},{"given":"F.","family":"Derue","sequence":"additional","affiliation":[]},{"given":"P.","family":"Dervan","sequence":"additional","affiliation":[]},{"given":"K.","family":"Desch","sequence":"additional","affiliation":[]},{"given":"C.","family":"Deterre","sequence":"additional","affiliation":[]},{"given":"K.","family":"Dette","sequence":"additional","affiliation":[]},{"given":"C.","family":"Deutsch","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Devesa","sequence":"additional","affiliation":[]},{"given":"P. O.","family":"Deviveiros","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Di Bello","sequence":"additional","affiliation":[]},{"given":"A.","family":"Di Ciaccio","sequence":"additional","affiliation":[]},{"given":"L.","family":"Di Ciaccio","sequence":"additional","affiliation":[]},{"given":"W. K.","family":"Di Clemente","sequence":"additional","affiliation":[]},{"given":"C.","family":"Di Donato","sequence":"additional","affiliation":[]},{"given":"A.","family":"Di Girolamo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Di Gregorio","sequence":"additional","affiliation":[]},{"given":"B.","family":"Di Micco","sequence":"additional","affiliation":[]},{"given":"R.","family":"Di Nardo","sequence":"additional","affiliation":[]},{"given":"K. F.","family":"Di Petrillo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Di Sipio","sequence":"additional","affiliation":[]},{"given":"C.","family":"Diaconu","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Dias","sequence":"additional","affiliation":[]},{"given":"T. Dias","family":"Do Vale","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Diaz","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dickinson","sequence":"additional","affiliation":[]},{"given":"E. B.","family":"Diehl","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dietrich","sequence":"additional","affiliation":[]},{"given":"S.","family":"D\u00edez Cornell","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dimitrievska","sequence":"additional","affiliation":[]},{"given":"W.","family":"Ding","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dingfelder","sequence":"additional","affiliation":[]},{"given":"F.","family":"Dittus","sequence":"additional","affiliation":[]},{"given":"F.","family":"Djama","sequence":"additional","affiliation":[]},{"given":"T.","family":"Djobava","sequence":"additional","affiliation":[]},{"given":"J. I.","family":"Djuvsland","sequence":"additional","affiliation":[]},{"given":"M. A. B.","family":"Do Vale","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dobre","sequence":"additional","affiliation":[]},{"given":"D.","family":"Dodsworth","sequence":"additional","affiliation":[]},{"given":"C.","family":"Doglioni","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dolejsi","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Dolezal","sequence":"additional","affiliation":[]},{"given":"M.","family":"Donadelli","sequence":"additional","affiliation":[]},{"given":"B.","family":"Dong","sequence":"additional","affiliation":[]},{"given":"J.","family":"Donini","sequence":"additional","affiliation":[]},{"given":"A.","family":"D\u2019onofrio","sequence":"additional","affiliation":[]},{"given":"M.","family":"D\u2019Onofrio","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dopke","sequence":"additional","affiliation":[]},{"given":"A.","family":"Doria","sequence":"additional","affiliation":[]},{"given":"M. T.","family":"Dova","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Doyle","sequence":"additional","affiliation":[]},{"given":"E.","family":"Drechsler","sequence":"additional","affiliation":[]},{"given":"E.","family":"Dreyer","sequence":"additional","affiliation":[]},{"given":"T.","family":"Dreyer","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Drobac","sequence":"additional","affiliation":[]},{"given":"D.","family":"Du","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Duan","sequence":"additional","affiliation":[]},{"given":"F.","family":"Dubinin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dubovsky","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dubreuil","sequence":"additional","affiliation":[]},{"given":"E.","family":"Duchovni","sequence":"additional","affiliation":[]},{"given":"G.","family":"Duckeck","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ducourthial","sequence":"additional","affiliation":[]},{"given":"O. A.","family":"Ducu","sequence":"additional","affiliation":[]},{"given":"D.","family":"Duda","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dudarev","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Dudder","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Duffield","sequence":"additional","affiliation":[]},{"given":"L.","family":"Duflot","sequence":"additional","affiliation":[]},{"given":"M.","family":"D\u00fchrssen","sequence":"additional","affiliation":[]},{"given":"C.","family":"D\u00fclsen","sequence":"additional","affiliation":[]},{"given":"lsen M.","family":"Dumancic","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Dumitriu","sequence":"additional","affiliation":[]},{"given":"A. K.","family":"Duncan","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dunford","sequence":"additional","affiliation":[]},{"given":"A.","family":"Duperrin","sequence":"additional","affiliation":[]},{"given":"H. Duran","family":"Yildiz","sequence":"additional","affiliation":[]},{"given":"M.","family":"D\u00fcren","sequence":"additional","affiliation":[]},{"given":"A.","family":"Durglishvili","sequence":"additional","affiliation":[]},{"given":"D.","family":"Duschinger","sequence":"additional","affiliation":[]},{"given":"B.","family":"Dutta","sequence":"additional","affiliation":[]},{"given":"D.","family":"Duvnjak","sequence":"additional","affiliation":[]},{"given":"B. L.","family":"Dwyer","sequence":"additional","affiliation":[]},{"given":"G. I.","family":"Dyckes","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dyndal","sequence":"additional","affiliation":[]},{"given":"S.","family":"Dysch","sequence":"additional","affiliation":[]},{"given":"B. S.","family":"Dziedzic","sequence":"additional","affiliation":[]},{"given":"K. M.","family":"Ecker","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Eggleston","sequence":"additional","affiliation":[]},{"given":"T.","family":"Eifert","sequence":"additional","affiliation":[]},{"given":"G.","family":"Eigen","sequence":"additional","affiliation":[]},{"given":"K.","family":"Einsweiler","sequence":"additional","affiliation":[]},{"given":"T.","family":"Ekelof","sequence":"additional","affiliation":[]},{"given":"H.","family":"El Jarrari","sequence":"additional","affiliation":[]},{"given":"R.","family":"El Kosseifi","sequence":"additional","affiliation":[]},{"given":"V.","family":"Ellajosyula","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ellert","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ellinghaus","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Elliot","sequence":"additional","affiliation":[]},{"given":"N.","family":"Ellis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Elmsheuser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Elsing","sequence":"additional","affiliation":[]},{"given":"D.","family":"Emeliyanov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Emerman","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Enari","sequence":"additional","affiliation":[]},{"given":"M. B.","family":"Epland","sequence":"additional","affiliation":[]},{"given":"J.","family":"Erdmann","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ereditato","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Erland","sequence":"additional","affiliation":[]},{"given":"M.","family":"Errenst","sequence":"additional","affiliation":[]},{"given":"M.","family":"Escalier","sequence":"additional","affiliation":[]},{"given":"C.","family":"Escobar","sequence":"additional","affiliation":[]},{"given":"O.","family":"Estrada Pastor","sequence":"additional","affiliation":[]},{"given":"E.","family":"Etzion","sequence":"additional","affiliation":[]},{"given":"H.","family":"Evans","sequence":"additional","affiliation":[]},{"given":"M. O.","family":"Evans","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ezhilov","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fabbri","sequence":"additional","affiliation":[]},{"given":"L.","family":"Fabbri","sequence":"additional","affiliation":[]},{"given":"V.","family":"Fabiani","sequence":"additional","affiliation":[]},{"given":"G.","family":"Facini","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Faisca Rodrigues Pereira","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Fakhrutdinov","sequence":"additional","affiliation":[]},{"given":"S.","family":"Falciano","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Falke","sequence":"additional","affiliation":[]},{"given":"S.","family":"Falke","sequence":"additional","affiliation":[]},{"given":"J.","family":"Faltova","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Fang","sequence":"additional","affiliation":[]},{"given":"G.","family":"Fanourakis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Fanti","sequence":"additional","affiliation":[]},{"given":"M.","family":"Faraj","sequence":"additional","affiliation":[]},{"given":"A.","family":"Farbin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Farilla","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Farina","sequence":"additional","affiliation":[]},{"given":"T.","family":"Farooque","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Farrington","sequence":"additional","affiliation":[]},{"given":"P.","family":"Farthouat","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fassi","sequence":"additional","affiliation":[]},{"given":"P.","family":"Fassnacht","sequence":"additional","affiliation":[]},{"given":"D.","family":"Fassouliotis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Faucci Giannelli","sequence":"additional","affiliation":[]},{"given":"W. J.","family":"Fawcett","sequence":"additional","affiliation":[]},{"given":"L.","family":"Fayard","sequence":"additional","affiliation":[]},{"given":"O. L.","family":"Fedin","sequence":"additional","affiliation":[]},{"given":"W.","family":"Fedorko","sequence":"additional","affiliation":[]},{"given":"M.","family":"Feickert","sequence":"additional","affiliation":[]},{"given":"L.","family":"Feligioni","sequence":"additional","affiliation":[]},{"given":"A.","family":"Fell","sequence":"additional","affiliation":[]},{"given":"C.","family":"Feng","sequence":"additional","affiliation":[]},{"given":"M.","family":"Feng","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Fenton","sequence":"additional","affiliation":[]},{"given":"A. B.","family":"Fenyuk","sequence":"additional","affiliation":[]},{"given":"S. W.","family":"Ferguson","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ferrando","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ferrante","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ferrari","sequence":"additional","affiliation":[]},{"given":"P.","family":"Ferrari","sequence":"additional","affiliation":[]},{"given":"R.","family":"Ferrari","sequence":"additional","affiliation":[]},{"given":"D. E.","family":"Ferreira de Lima","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ferrer","sequence":"additional","affiliation":[]},{"given":"D.","family":"Ferrere","sequence":"additional","affiliation":[]},{"given":"C.","family":"Ferretti","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fiedler","sequence":"additional","affiliation":[]},{"given":"A.","family":"Filip\u010di\u010d","sequence":"additional","affiliation":[]},{"given":"F.","family":"Filthaut","sequence":"additional","affiliation":[]},{"given":"K. D.","family":"Finelli","sequence":"additional","affiliation":[]},{"given":"M. C. N.","family":"Fiolhais","sequence":"additional","affiliation":[]},{"given":"L.","family":"Fiorini","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fischer","sequence":"additional","affiliation":[]},{"given":"W. C.","family":"Fisher","sequence":"additional","affiliation":[]},{"given":"I.","family":"Fleck","sequence":"additional","affiliation":[]},{"given":"P.","family":"Fleischmann","sequence":"additional","affiliation":[]},{"given":"T.","family":"Flick","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Flierl","sequence":"additional","affiliation":[]},{"given":"L.","family":"Flores","sequence":"additional","affiliation":[]},{"given":"L. R.","family":"Flores Castillo","sequence":"additional","affiliation":[]},{"given":"F. M.","family":"Follega","sequence":"additional","affiliation":[]},{"given":"N.","family":"Fomin","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Foo","sequence":"additional","affiliation":[]},{"given":"G. T.","family":"Forcolin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Formica","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"F\u00f6rster","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Forti","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Foster","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Foti","sequence":"additional","affiliation":[]},{"given":"D.","family":"Fournier","sequence":"additional","affiliation":[]},{"given":"H.","family":"Fox","sequence":"additional","affiliation":[]},{"given":"P.","family":"Francavilla","sequence":"additional","affiliation":[]},{"given":"S.","family":"Francescato","sequence":"additional","affiliation":[]},{"given":"M.","family":"Franchini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Franchino","sequence":"additional","affiliation":[]},{"given":"D.","family":"Francis","sequence":"additional","affiliation":[]},{"given":"L.","family":"Franconi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Franklin","sequence":"additional","affiliation":[]},{"given":"A. N.","family":"Fray","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Freeman","sequence":"additional","affiliation":[]},{"given":"B.","family":"Freund","sequence":"additional","affiliation":[]},{"given":"W. S.","family":"Freund","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Freundlich","sequence":"additional","affiliation":[]},{"given":"D. C.","family":"Frizzell","sequence":"additional","affiliation":[]},{"given":"D.","family":"Froidevaux","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Frost","sequence":"additional","affiliation":[]},{"given":"C.","family":"Fukunaga","sequence":"additional","affiliation":[]},{"given":"E.","family":"Fullana Torregrosa","sequence":"additional","affiliation":[]},{"given":"T.","family":"Fusayasu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Fuster","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gabrielli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gabrielli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gadatsch","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gadow","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gagliardi","sequence":"additional","affiliation":[]},{"given":"L. G.","family":"Gagnon","sequence":"additional","affiliation":[]},{"given":"B.","family":"Galhardo","sequence":"additional","affiliation":[]},{"given":"G. E.","family":"Gallardo","sequence":"additional","affiliation":[]},{"given":"E. J.","family":"Gallas","sequence":"additional","affiliation":[]},{"given":"B. J.","family":"Gallop","sequence":"additional","affiliation":[]},{"given":"G.","family":"Galster","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gamboa Goni","sequence":"additional","affiliation":[]},{"given":"K. K.","family":"Gan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ganguly","sequence":"additional","affiliation":[]},{"given":"J.","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Y. S.","family":"Gao","sequence":"additional","affiliation":[]},{"given":"C.","family":"Garc\u00eda","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Garc\u00eda Navarro","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Garc\u00eda Pascual","sequence":"additional","affiliation":[]},{"given":"C.","family":"Garcia-Argos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Garcia-Sciveres","sequence":"additional","affiliation":[]},{"given":"R. W.","family":"Gardner","sequence":"additional","affiliation":[]},{"given":"N.","family":"Garelli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gargiulo","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Garner","sequence":"additional","affiliation":[]},{"given":"V.","family":"Garonne","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Gasiorowski","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gaspar","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gaudiello","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gaudio","sequence":"additional","affiliation":[]},{"given":"I. L.","family":"Gavrilenko","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gavrilyuk","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gay","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gaycken","sequence":"additional","affiliation":[]},{"given":"E. N.","family":"Gazis","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Geanta","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Gee","sequence":"additional","affiliation":[]},{"given":"C. N. P.","family":"Gee","sequence":"additional","affiliation":[]},{"given":"J.","family":"Geisen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Geisen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gemme","sequence":"additional","affiliation":[]},{"given":"M. H.","family":"Genest","sequence":"additional","affiliation":[]},{"given":"C.","family":"Geng","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gentile","sequence":"additional","affiliation":[]},{"given":"S.","family":"George","sequence":"additional","affiliation":[]},{"given":"T.","family":"Geralis","sequence":"additional","affiliation":[]},{"given":"L. O.","family":"Gerlach","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gessinger-Befurt","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gessner","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ghasemi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ghasemi Bostanabad","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ghneimat","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ghosh","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ghosh","sequence":"additional","affiliation":[]},{"given":"B.","family":"Giacobbe","sequence":"additional","affiliation":[]},{"given":"S.","family":"Giagu","sequence":"additional","affiliation":[]},{"given":"N.","family":"Giangiacomi","sequence":"additional","affiliation":[]},{"given":"P.","family":"Giannetti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Giannini","sequence":"additional","affiliation":[]},{"given":"G.","family":"Giannini","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Gibson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Gignac","sequence":"additional","affiliation":[]},{"given":"D.","family":"Gillberg","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gilles","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Gingrich","sequence":"additional","affiliation":[]},{"given":"M. P.","family":"Giordani","sequence":"additional","affiliation":[]},{"given":"P. F.","family":"Giraud","sequence":"additional","affiliation":[]},{"given":"G.","family":"Giugliarelli","sequence":"additional","affiliation":[]},{"given":"D.","family":"Giugni","sequence":"additional","affiliation":[]},{"given":"F.","family":"Giuli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gkaitatzis","sequence":"additional","affiliation":[]},{"given":"I.","family":"Gkialas","sequence":"additional","affiliation":[]},{"given":"E. L.","family":"Gkougkousis","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gkountoumis","sequence":"additional","affiliation":[]},{"given":"L. K.","family":"Gladilin","sequence":"additional","affiliation":[]},{"given":"C.","family":"Glasman","sequence":"additional","affiliation":[]},{"given":"J.","family":"Glatzer","sequence":"additional","affiliation":[]},{"given":"P. C. F.","family":"Glaysher","sequence":"additional","affiliation":[]},{"given":"A.","family":"Glazov","sequence":"additional","affiliation":[]},{"given":"G. R.","family":"Gledhill","sequence":"additional","affiliation":[]},{"given":"I.","family":"Gnesi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Goblirsch-Kolb","sequence":"additional","affiliation":[]},{"given":"D.","family":"Godin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Goldfarb","sequence":"additional","affiliation":[]},{"given":"T.","family":"Golling","sequence":"additional","affiliation":[]},{"given":"D.","family":"Golubkov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gomes","sequence":"additional","affiliation":[]},{"given":"R.","family":"Goncalves Gama","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gon\u00e7alo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gonella","sequence":"additional","affiliation":[]},{"given":"L.","family":"Gonella","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gongadze","sequence":"additional","affiliation":[]},{"given":"F.","family":"Gonnella","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Gonski","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gonz\u00e1lez de la Hoz","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gonzalez Fernandez","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gonzalez-Sevilla","sequence":"additional","affiliation":[]},{"given":"G. R.","family":"Gonzalvo Rodriguez","sequence":"additional","affiliation":[]},{"given":"L.","family":"Goossens","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Gorasia","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Gorbounov","sequence":"additional","affiliation":[]},{"given":"H. A.","family":"Gordon","sequence":"additional","affiliation":[]},{"given":"B.","family":"Gorini","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gorini","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gori\u0161ek","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Goshaw","sequence":"additional","affiliation":[]},{"given":"M. I.","family":"Gostkin","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Gottardo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Gouighri","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Goussiou","sequence":"additional","affiliation":[]},{"given":"N.","family":"Govender","sequence":"additional","affiliation":[]},{"given":"C.","family":"Goy","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gozani","sequence":"additional","affiliation":[]},{"given":"I.","family":"Grabowska-Bold","sequence":"additional","affiliation":[]},{"given":"E. C.","family":"Graham","sequence":"additional","affiliation":[]},{"given":"J.","family":"Gramling","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gramstad","sequence":"additional","affiliation":[]},{"given":"S.","family":"Grancagnolo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Grandi","sequence":"additional","affiliation":[]},{"given":"V.","family":"Gratchev","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Gravila","sequence":"additional","affiliation":[]},{"given":"F. G.","family":"Gravili","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gray","sequence":"additional","affiliation":[]},{"given":"H. M.","family":"Gray","sequence":"additional","affiliation":[]},{"given":"C.","family":"Grefe","sequence":"additional","affiliation":[]},{"given":"K.","family":"Gregersen","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Gregor","sequence":"additional","affiliation":[]},{"given":"P.","family":"Grenier","sequence":"additional","affiliation":[]},{"given":"K.","family":"Grevtsov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Grieco","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Grieser","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Grillo","sequence":"additional","affiliation":[]},{"given":"K.","family":"Grimm","sequence":"additional","affiliation":[]},{"given":"S.","family":"Grinstein","sequence":"additional","affiliation":[]},{"given":"J.-F.","family":"Grivaz","sequence":"additional","affiliation":[]},{"given":"S.","family":"Groh","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gross","sequence":"additional","affiliation":[]},{"given":"J.","family":"Grosse-Knetter","sequence":"additional","affiliation":[]},{"given":"Z. J.","family":"Grout","sequence":"additional","affiliation":[]},{"given":"C.","family":"Grud","sequence":"additional","affiliation":[]},{"given":"A.","family":"Grummer","sequence":"additional","affiliation":[]},{"given":"L.","family":"Guan","sequence":"additional","affiliation":[]},{"given":"W.","family":"Guan","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gubbels","sequence":"additional","affiliation":[]},{"given":"J.","family":"Guenther","sequence":"additional","affiliation":[]},{"given":"A.","family":"Guerguichon","sequence":"additional","affiliation":[]},{"given":"J. G. R.","family":"Guerrero Rojas","sequence":"additional","affiliation":[]},{"given":"F.","family":"Guescini","sequence":"additional","affiliation":[]},{"given":"D.","family":"Guest","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gugel","sequence":"additional","affiliation":[]},{"given":"T.","family":"Guillemin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Guindon","sequence":"additional","affiliation":[]},{"given":"U.","family":"Gul","sequence":"additional","affiliation":[]},{"given":"J.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"W.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gupta","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gurbuz","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gustavino","sequence":"additional","affiliation":[]},{"given":"M.","family":"Guth","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gutierrez","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gutschow","sequence":"additional","affiliation":[]},{"given":"C.","family":"Guyot","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gwenlan","sequence":"additional","affiliation":[]},{"given":"C. B.","family":"Gwilliam","sequence":"additional","affiliation":[]},{"given":"A.","family":"Haas","sequence":"additional","affiliation":[]},{"given":"C.","family":"Haber","sequence":"additional","affiliation":[]},{"given":"H. K.","family":"Hadavand","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hadef","sequence":"additional","affiliation":[]},{"given":"M.","family":"Haleem","sequence":"additional","affiliation":[]},{"given":"J.","family":"Haley","sequence":"additional","affiliation":[]},{"given":"G.","family":"Halladjian","sequence":"additional","affiliation":[]},{"given":"G. D.","family":"Hallewell","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hamacher","sequence":"additional","affiliation":[]},{"given":"P.","family":"Hamal","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hamano","sequence":"additional","affiliation":[]},{"given":"H.","family":"Hamdaoui","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hamer","sequence":"additional","affiliation":[]},{"given":"G. N.","family":"Hamity","sequence":"additional","affiliation":[]},{"given":"K.","family":"Han","sequence":"additional","affiliation":[]},{"given":"L.","family":"Han","sequence":"additional","affiliation":[]},{"given":"S.","family":"Han","sequence":"additional","affiliation":[]},{"given":"Y. F.","family":"Han","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hanagaki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hance","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Handl","sequence":"additional","affiliation":[]},{"given":"B.","family":"Haney","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hankache","sequence":"additional","affiliation":[]},{"given":"E.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"P. H.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"E. C.","family":"Hanson","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hara","sequence":"additional","affiliation":[]},{"given":"T.","family":"Harenberg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Harkusha","sequence":"additional","affiliation":[]},{"given":"P. F.","family":"Harrison","sequence":"additional","affiliation":[]},{"given":"N. M.","family":"Hartmann","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Hasegawa","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hasib","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hassani","sequence":"additional","affiliation":[]},{"given":"S.","family":"Haug","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hauser","sequence":"additional","affiliation":[]},{"given":"L. B.","family":"Havener","sequence":"additional","affiliation":[]},{"given":"M.","family":"Havranek","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Hawkes","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Hawkings","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hayden","sequence":"additional","affiliation":[]},{"given":"C.","family":"Hayes","sequence":"additional","affiliation":[]},{"given":"R. L.","family":"Hayes","sequence":"additional","affiliation":[]},{"given":"C. P.","family":"Hays","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Hays","sequence":"additional","affiliation":[]},{"given":"H. S.","family":"Hayward","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Haywood","sequence":"additional","affiliation":[]},{"given":"F.","family":"He","sequence":"additional","affiliation":[]},{"given":"M. P.","family":"Heath","sequence":"additional","affiliation":[]},{"given":"V.","family":"Hedberg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Heer","sequence":"additional","affiliation":[]},{"given":"K. K.","family":"Heidegger","sequence":"additional","affiliation":[]},{"given":"W. D.","family":"Heidorn","sequence":"additional","affiliation":[]},{"given":"J.","family":"Heilman","sequence":"additional","affiliation":[]},{"given":"S.","family":"Heim","sequence":"additional","affiliation":[]},{"given":"T.","family":"Heim","sequence":"additional","affiliation":[]},{"given":"B.","family":"Heinemann","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Heinrich","sequence":"additional","affiliation":[]},{"given":"L.","family":"Heinrich","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hejbal","sequence":"additional","affiliation":[]},{"given":"L.","family":"Helary","sequence":"additional","affiliation":[]},{"given":"A.","family":"Held","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hellesund","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Helling","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hellman","sequence":"additional","affiliation":[]},{"given":"C.","family":"Helsens","sequence":"additional","affiliation":[]},{"given":"R. C. W.","family":"Henderson","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Heng","sequence":"additional","affiliation":[]},{"given":"L.","family":"Henkelmann","sequence":"additional","affiliation":[]},{"given":"S.","family":"Henkelmann","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Henriques Correia","sequence":"additional","affiliation":[]},{"given":"H.","family":"Herde","sequence":"additional","affiliation":[]},{"given":"V.","family":"Herget","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Hern\u00e1ndez Jim\u00e9nez","sequence":"additional","affiliation":[]},{"given":"H.","family":"Herr","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Herrmann","sequence":"additional","affiliation":[]},{"given":"T.","family":"Herrmann","sequence":"additional","affiliation":[]},{"given":"G.","family":"Herten","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hertenberger","sequence":"additional","affiliation":[]},{"given":"L.","family":"Hervas","sequence":"additional","affiliation":[]},{"given":"T. C.","family":"Herwig","sequence":"additional","affiliation":[]},{"given":"G. G.","family":"Hesketh","sequence":"additional","affiliation":[]},{"given":"N. P.","family":"Hessey","sequence":"additional","affiliation":[]},{"given":"A.","family":"Higashida","sequence":"additional","affiliation":[]},{"given":"S.","family":"Higashino","sequence":"additional","affiliation":[]},{"given":"E.","family":"Hig\u00f3n-Rodriguez","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hildebrand","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Hill","sequence":"additional","affiliation":[]},{"given":"K. K.","family":"Hill","sequence":"additional","affiliation":[]},{"given":"K. H.","family":"Hiller","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Hillier","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hils","sequence":"additional","affiliation":[]},{"given":"I.","family":"Hinchliffe","sequence":"additional","affiliation":[]},{"given":"F.","family":"Hinterkeuser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hirose","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hirose","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hirschbuehl","sequence":"additional","affiliation":[]},{"given":"B.","family":"Hiti","sequence":"additional","affiliation":[]},{"given":"O.","family":"Hladik","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Hlaluku","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hobbs","sequence":"additional","affiliation":[]},{"given":"N.","family":"Hod","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Hodgkinson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hoecker","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hohn","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hohov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Holm","sequence":"additional","affiliation":[]},{"given":"T. R.","family":"Holmes","sequence":"additional","affiliation":[]},{"given":"M.","family":"Holzbock","sequence":"additional","affiliation":[]},{"given":"L. B. A. H.","family":"Hommels","sequence":"additional","affiliation":[]},{"given":"S.","family":"Honda","sequence":"additional","affiliation":[]},{"given":"T. M.","family":"Hong","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Honig","sequence":"additional","affiliation":[]},{"given":"A.","family":"H\u00f6nle","sequence":"additional","affiliation":[]},{"given":"B. H.","family":"Hooberman","sequence":"additional","affiliation":[]},{"given":"W. H.","family":"Hopkins","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Horii","sequence":"additional","affiliation":[]},{"given":"P.","family":"Horn","sequence":"additional","affiliation":[]},{"given":"L. A.","family":"Horyn","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hou","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hoummada","sequence":"additional","affiliation":[]},{"given":"J.","family":"Howarth","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hoya","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hrabovsky","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hrdinka","sequence":"additional","affiliation":[]},{"given":"I.","family":"Hristova","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hrivnac","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hrynevich","sequence":"additional","affiliation":[]},{"given":"T.","family":"Hryn\u2019ova","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Hsu","sequence":"additional","affiliation":[]},{"given":"S.-C.","family":"Hsu","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Hu","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Y. F.","family":"Hu","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Hubacek","sequence":"additional","affiliation":[]},{"given":"F.","family":"Hubaut","sequence":"additional","affiliation":[]},{"given":"M.","family":"Huebner","sequence":"additional","affiliation":[]},{"given":"F.","family":"Huegging","sequence":"additional","affiliation":[]},{"given":"T. B.","family":"Huffman","sequence":"additional","affiliation":[]},{"given":"M.","family":"Huhtinen","sequence":"additional","affiliation":[]},{"given":"R. F. H.","family":"Hunter","sequence":"additional","affiliation":[]},{"given":"P.","family":"Huo","sequence":"additional","affiliation":[]},{"given":"N.","family":"Huseynov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Huston","sequence":"additional","affiliation":[]},{"given":"J.","family":"Huth","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hyneman","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hyrych","sequence":"additional","affiliation":[]},{"given":"G.","family":"Iacobucci","sequence":"additional","affiliation":[]},{"given":"G.","family":"Iakovidis","sequence":"additional","affiliation":[]},{"given":"I.","family":"Ibragimov","sequence":"additional","affiliation":[]},{"given":"L.","family":"Iconomidou-Fayard","sequence":"additional","affiliation":[]},{"given":"P.","family":"Iengo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Ignazzi","sequence":"additional","affiliation":[]},{"given":"O.","family":"Igonkina","sequence":"additional","affiliation":[]},{"given":"R.","family":"Iguchi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Iizawa","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Ikegami","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ikeno","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ilg","sequence":"additional","affiliation":[]},{"given":"D.","family":"Iliadis","sequence":"additional","affiliation":[]},{"given":"N.","family":"Ilic","sequence":"additional","affiliation":[]},{"given":"F.","family":"Iltzsche","sequence":"additional","affiliation":[]},{"given":"G.","family":"Introzzi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Iodice","sequence":"additional","affiliation":[]},{"given":"K.","family":"Iordanidou","sequence":"additional","affiliation":[]},{"given":"V.","family":"Ippolito","sequence":"additional","affiliation":[]},{"given":"M. F.","family":"Isacson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ishino","sequence":"additional","affiliation":[]},{"given":"W.","family":"Islam","sequence":"additional","affiliation":[]},{"given":"C.","family":"Issever","sequence":"additional","affiliation":[]},{"given":"S.","family":"Istin","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ito","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Iturbe Ponce","sequence":"additional","affiliation":[]},{"given":"R.","family":"Iuppa","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ivina","sequence":"additional","affiliation":[]},{"given":"H.","family":"Iwasaki","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Izen","sequence":"additional","affiliation":[]},{"given":"V.","family":"Izzo","sequence":"additional","affiliation":[]},{"given":"P.","family":"Jacka","sequence":"additional","affiliation":[]},{"given":"P.","family":"Jackson","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Jacobs","sequence":"additional","affiliation":[]},{"given":"B. P.","family":"Jaeger","sequence":"additional","affiliation":[]},{"given":"V.","family":"Jain","sequence":"additional","affiliation":[]},{"given":"G.","family":"J\u00e4kel","sequence":"additional","affiliation":[]},{"given":"K. B.","family":"Jakobi","sequence":"additional","affiliation":[]},{"given":"K.","family":"Jakobs","sequence":"additional","affiliation":[]},{"given":"T.","family":"Jakoubek","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jamieson","sequence":"additional","affiliation":[]},{"given":"K. W.","family":"Janas","sequence":"additional","affiliation":[]},{"given":"R.","family":"Jansky","sequence":"additional","affiliation":[]},{"given":"M.","family":"Janus","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Janus","sequence":"additional","affiliation":[]},{"given":"G.","family":"Jarlskog","sequence":"additional","affiliation":[]},{"given":"N.","family":"Javadov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Jav\u016frek","sequence":"additional","affiliation":[]},{"given":"M.","family":"Javurkova","sequence":"additional","affiliation":[]},{"given":"F.","family":"Jeanneau","sequence":"additional","affiliation":[]},{"given":"L.","family":"Jeanty","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jejelava","sequence":"additional","affiliation":[]},{"given":"A.","family":"Jelinskas","sequence":"additional","affiliation":[]},{"given":"P.","family":"Jenni","sequence":"additional","affiliation":[]},{"given":"N.","family":"Jeong","sequence":"additional","affiliation":[]},{"given":"S.","family":"J\u00e9z\u00e9quel","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ji","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jia","sequence":"additional","affiliation":[]},{"given":"H.","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Jiggins","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Jimenez Morales","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jimenez Pena","sequence":"additional","affiliation":[]},{"given":"S.","family":"Jin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Jinaru","sequence":"additional","affiliation":[]},{"given":"O.","family":"Jinnouchi","sequence":"additional","affiliation":[]},{"given":"H.","family":"Jivan","sequence":"additional","affiliation":[]},{"given":"P.","family":"Johansson","sequence":"additional","affiliation":[]},{"given":"K. A.","family":"Johns","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Johnson","sequence":"additional","affiliation":[]},{"given":"R. W. L.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"S.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jongmanns","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Jorge","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jovicevic","sequence":"additional","affiliation":[]},{"given":"X.","family":"Ju","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Junggeburth","sequence":"additional","affiliation":[]},{"given":"A.","family":"Juste Rozas","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kaczmarska","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kado","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kagan","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kagan","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kahn","sequence":"additional","affiliation":[]},{"given":"C.","family":"Kahra","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kaji","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kajomovitz","sequence":"additional","affiliation":[]},{"given":"C. W.","family":"Kalderon","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kaluza","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kamenshchikov","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kaneda","sequence":"additional","affiliation":[]},{"given":"N. J.","family":"Kang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kang","sequence":"additional","affiliation":[]},{"given":"L.","family":"Kanjir","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Kano","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kanzaki","sequence":"additional","affiliation":[]},{"given":"L. S.","family":"Kaplan","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kar","sequence":"additional","affiliation":[]},{"given":"K.","family":"Karava","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Kareem","sequence":"additional","affiliation":[]},{"given":"S. N.","family":"Karpov","sequence":"additional","affiliation":[]},{"given":"Z. M.","family":"Karpova","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kartvelishvili","sequence":"additional","affiliation":[]},{"given":"A. N.","family":"Karyukhin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kastanas","sequence":"additional","affiliation":[]},{"given":"C.","family":"Kato","sequence":"additional","affiliation":[]},{"given":"J.","family":"Katzy","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kawade","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kawagoe","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kawaguchi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kawamoto","sequence":"additional","affiliation":[]},{"given":"G.","family":"Kawamura","sequence":"additional","affiliation":[]},{"given":"E. F.","family":"Kay","sequence":"additional","affiliation":[]},{"given":"V. F.","family":"Kazanin","sequence":"additional","affiliation":[]},{"given":"R.","family":"Keeler","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kehoe","sequence":"additional","affiliation":[]},{"given":"J. S.","family":"Keller","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kellermann","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kelsey","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Kempster","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kendrick","sequence":"additional","affiliation":[]},{"given":"K. E.","family":"Kennedy","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kepka","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kersten","sequence":"additional","affiliation":[]},{"given":"B. P.","family":"Ker\u0161evan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ketabchi Haghighat","sequence":"additional","affiliation":[]},{"given":"M.","family":"Khader","sequence":"additional","affiliation":[]},{"given":"F.","family":"Khalil-Zada","sequence":"additional","affiliation":[]},{"given":"M.","family":"Khandoga","sequence":"additional","affiliation":[]},{"given":"A.","family":"Khanov","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Kharlamov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kharlamova","sequence":"additional","affiliation":[]},{"given":"E. E.","family":"Khoda","sequence":"additional","affiliation":[]},{"given":"A.","family":"Khodinov","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Khoo","sequence":"additional","affiliation":[]},{"given":"E.","family":"Khramov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Khubua","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kido","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kiehn","sequence":"additional","affiliation":[]},{"given":"C. R.","family":"Kilby","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Y. K.","family":"Kim","sequence":"additional","affiliation":[]},{"given":"N.","family":"Kimura","sequence":"additional","affiliation":[]},{"given":"O. M.","family":"Kind","sequence":"additional","affiliation":[]},{"given":"B. T.","family":"King","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kirchmeier","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kirk","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Kiryunin","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kishimoto","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Kisliuk","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kitali","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kivernyk","sequence":"additional","affiliation":[]},{"given":"T.","family":"Klapdor-Kleingrothaus","sequence":"additional","affiliation":[]},{"given":"M.","family":"Klassen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"M. H.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"M.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"U.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kleinknecht","sequence":"additional","affiliation":[]},{"given":"P.","family":"Klimek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Klimentov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Klingl","sequence":"additional","affiliation":[]},{"given":"T.","family":"Klioutchnikova","sequence":"additional","affiliation":[]},{"given":"F. F.","family":"Klitzner","sequence":"additional","affiliation":[]},{"given":"P.","family":"Kluit","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kluth","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kneringer","sequence":"additional","affiliation":[]},{"given":"E. B. F. G.","family":"Knoops","sequence":"additional","affiliation":[]},{"given":"A.","family":"Knue","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kobayashi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kobayashi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kobel","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kocian","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kodama","sequence":"additional","affiliation":[]},{"given":"P.","family":"Kodys","sequence":"additional","affiliation":[]},{"given":"P. T.","family":"Koenig","sequence":"additional","affiliation":[]},{"given":"T.","family":"Koffas","sequence":"additional","affiliation":[]},{"given":"N. M.","family":"K\u00f6hler","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kolb","sequence":"additional","affiliation":[]},{"given":"I.","family":"Koletsou","sequence":"additional","affiliation":[]},{"given":"T.","family":"Komarek","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kondo","sequence":"additional","affiliation":[]},{"given":"K.","family":"K\u00f6neke","sequence":"additional","affiliation":[]},{"given":"A. X. Y.","family":"Kong","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"K\u00f6nig","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kono","sequence":"additional","affiliation":[]},{"given":"V.","family":"Konstantinides","sequence":"additional","affiliation":[]},{"given":"N.","family":"Konstantinidis","sequence":"additional","affiliation":[]},{"given":"B.","family":"Konya","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kopeliansky","sequence":"additional","affiliation":[]},{"given":"S.","family":"Koperny","sequence":"additional","affiliation":[]},{"given":"K.","family":"Korcyl","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kordas","sequence":"additional","affiliation":[]},{"given":"G.","family":"Koren","sequence":"additional","affiliation":[]},{"given":"A.","family":"Korn","sequence":"additional","affiliation":[]},{"given":"I.","family":"Korolkov","sequence":"additional","affiliation":[]},{"given":"E. V.","family":"Korolkova","sequence":"additional","affiliation":[]},{"given":"N.","family":"Korotkova","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kortner","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kortner","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kosek","sequence":"additional","affiliation":[]},{"given":"V. V.","family":"Kostyukhin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kotsokechagia","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kotwal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Koulouris","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kourkoumeli-Charalampidi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Kourkoumelis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kourlitis","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kouskoura","sequence":"additional","affiliation":[]},{"given":"A. B.","family":"Kowalewska","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kowalewski","sequence":"additional","affiliation":[]},{"given":"W.","family":"Kozanecki","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Kozhin","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Kramarenko","sequence":"additional","affiliation":[]},{"given":"G.","family":"Kramberger","sequence":"additional","affiliation":[]},{"given":"D.","family":"Krasnopevtsev","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"Krasny","sequence":"additional","affiliation":[]},{"given":"A.","family":"Krasznahorkay","sequence":"additional","affiliation":[]},{"given":"D.","family":"Krauss","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Kremer","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kretzschmar","sequence":"additional","affiliation":[]},{"given":"P.","family":"Krieger","sequence":"additional","affiliation":[]},{"given":"F.","family":"Krieter","sequence":"additional","affiliation":[]},{"given":"A.","family":"Krishnan","sequence":"additional","affiliation":[]},{"given":"K.","family":"Krizka","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kroeninger","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kroha","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kroll","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kroll","sequence":"additional","affiliation":[]},{"given":"K. S.","family":"Krowpman","sequence":"additional","affiliation":[]},{"given":"U.","family":"Kruchonak","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kr\u00fcger","sequence":"additional","affiliation":[]},{"given":"N.","family":"Krumnack","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Kruse","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Krzysiak","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kubota","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kuchinskaia","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kuday","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kuechler","sequence":"additional","affiliation":[]},{"given":"J. T.","family":"Kuechler","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kuehn","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kugel","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kuhl","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kukhtin","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kukla","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Kulchitsky","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kuleshov","sequence":"additional","affiliation":[]},{"given":"Y. P.","family":"Kulinich","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kuna","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kunigo","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kupco","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kupfer","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kuprash","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kurashige","sequence":"additional","affiliation":[]},{"given":"L. L.","family":"Kurchaninov","sequence":"additional","affiliation":[]},{"given":"Y. A.","family":"Kurochkin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kurova","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Kurth","sequence":"additional","affiliation":[]},{"given":"E. S.","family":"Kuwertz","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kuze","sequence":"additional","affiliation":[]},{"given":"A. K.","family":"Kvam","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kvita","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kwan","sequence":"additional","affiliation":[]},{"given":"L.","family":"La Rotonda","sequence":"additional","affiliation":[]},{"given":"F.","family":"La Ruffa","sequence":"additional","affiliation":[]},{"given":"C.","family":"Lacasta","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lacava","sequence":"additional","affiliation":[]},{"given":"D. P. J.","family":"Lack","sequence":"additional","affiliation":[]},{"given":"H.","family":"Lacker","sequence":"additional","affiliation":[]},{"given":"D.","family":"Lacour","sequence":"additional","affiliation":[]},{"given":"E.","family":"Ladygin","sequence":"additional","affiliation":[]},{"given":"R.","family":"Lafaye","sequence":"additional","affiliation":[]},{"given":"B.","family":"Laforge","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lagouri","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lai","sequence":"additional","affiliation":[]},{"given":"I. K.","family":"Lakomiec","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lammers","sequence":"additional","affiliation":[]},{"given":"W.","family":"Lampl","sequence":"additional","affiliation":[]},{"given":"C.","family":"Lampoudis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Lan\u00e7on","sequence":"additional","affiliation":[]},{"given":"U.","family":"Landgraf","sequence":"additional","affiliation":[]},{"given":"M. P. J.","family":"Landon","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Lanfermann","sequence":"additional","affiliation":[]},{"given":"V. S.","family":"Lang","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Lange","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Langenberg","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Lankford","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lanni","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lantzsch","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lanza","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lapertosa","sequence":"additional","affiliation":[]},{"given":"S.","family":"Laplace","sequence":"additional","affiliation":[]},{"given":"J. F.","family":"Laporte","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lari","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lasagni Manghi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lassnig","sequence":"additional","affiliation":[]},{"given":"T. S.","family":"Lau","sequence":"additional","affiliation":[]},{"given":"A.","family":"Laudrain","sequence":"additional","affiliation":[]},{"given":"A.","family":"Laurier","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lavorgna","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Lawlor","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lazzaroni","sequence":"additional","affiliation":[]},{"given":"B.","family":"Le","sequence":"additional","affiliation":[]},{"given":"E.","family":"Le Guirriec","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lebedev","sequence":"additional","affiliation":[]},{"given":"M.","family":"LeBlanc","sequence":"additional","affiliation":[]},{"given":"T.","family":"LeCompte","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ledroit-Guillon","sequence":"additional","affiliation":[]},{"given":"A. C. A.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"G. R.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"L.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"S. C.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"B.","family":"Lefebvre","sequence":"additional","affiliation":[]},{"given":"H. P.","family":"Lefebvre","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lefebvre","sequence":"additional","affiliation":[]},{"given":"C.","family":"Leggett","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lehmann","sequence":"additional","affiliation":[]},{"given":"N.","family":"Lehmann","sequence":"additional","affiliation":[]},{"given":"G.","family":"Lehmann Miotto","sequence":"additional","affiliation":[]},{"given":"W. A.","family":"Leight","sequence":"additional","affiliation":[]},{"given":"A.","family":"Leisos","sequence":"additional","affiliation":[]},{"given":"M. A. L.","family":"Leite","sequence":"additional","affiliation":[]},{"given":"C. E.","family":"Leitgeb","sequence":"additional","affiliation":[]},{"given":"R.","family":"Leitner","sequence":"additional","affiliation":[]},{"given":"D.","family":"Lellouch","sequence":"additional","affiliation":[]},{"given":"K. J. C.","family":"Leney","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lenz","sequence":"additional","affiliation":[]},{"given":"R.","family":"Leone","sequence":"additional","affiliation":[]},{"given":"S.","family":"Leone","sequence":"additional","affiliation":[]},{"given":"C.","family":"Leonidopoulos","sequence":"additional","affiliation":[]},{"given":"A.","family":"Leopold","sequence":"additional","affiliation":[]},{"given":"C.","family":"Leroy","sequence":"additional","affiliation":[]},{"given":"R.","family":"Les","sequence":"additional","affiliation":[]},{"given":"C. G.","family":"Lester","sequence":"additional","affiliation":[]},{"given":"M.","family":"Levchenko","sequence":"additional","affiliation":[]},{"given":"J.","family":"Lev\u00eaque","sequence":"additional","affiliation":[]},{"given":"D.","family":"Levin","sequence":"additional","affiliation":[]},{"given":"L. J.","family":"Levinson","sequence":"additional","affiliation":[]},{"given":"D. J.","family":"Lewis","sequence":"additional","affiliation":[]},{"given":"B.","family":"Li","sequence":"additional","affiliation":[]},{"given":"B.","family":"Li","sequence":"additional","affiliation":[]},{"given":"C-Q.","family":"Li","sequence":"additional","affiliation":[]},{"given":"F.","family":"Li","sequence":"additional","affiliation":[]},{"given":"H.","family":"Li","sequence":"additional","affiliation":[]},{"given":"H.","family":"Li","sequence":"additional","affiliation":[]},{"given":"J.","family":"Li","sequence":"additional","affiliation":[]},{"given":"K.","family":"Li","sequence":"additional","affiliation":[]},{"given":"L.","family":"Li","sequence":"additional","affiliation":[]},{"given":"M.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Q. Y.","family":"Li","sequence":"additional","affiliation":[]},{"given":"S.","family":"Li","sequence":"additional","affiliation":[]},{"given":"X.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Liang","sequence":"additional","affiliation":[]},{"given":"B.","family":"Liberti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Liblong","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lie","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lim","sequence":"additional","affiliation":[]},{"given":"C. Y.","family":"Lin","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lin","sequence":"additional","affiliation":[]},{"given":"T. H.","family":"Lin","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Linck","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Lindon","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Lionti","sequence":"additional","affiliation":[]},{"given":"E.","family":"Lipeles","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lipniacka","sequence":"additional","affiliation":[]},{"given":"T. M.","family":"Liss","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lister","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Little","sequence":"additional","affiliation":[]},{"given":"B.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"B. X.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"H. B.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"H.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"J. K. K.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"K.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"M.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"M. Y.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"P.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y. L.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y. W.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"M.","family":"Livan","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lleres","sequence":"additional","affiliation":[]},{"given":"J.","family":"Llorente Merino","sequence":"additional","affiliation":[]},{"given":"S. L.","family":"Lloyd","sequence":"additional","affiliation":[]},{"given":"C. Y.","family":"Lo","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Lobodzinska","sequence":"additional","affiliation":[]},{"given":"P.","family":"Loch","sequence":"additional","affiliation":[]},{"given":"S.","family":"Loffredo","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lohse","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lohwasser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lokajicek","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Long","sequence":"additional","affiliation":[]},{"given":"R. E.","family":"Long","sequence":"additional","affiliation":[]},{"given":"L.","family":"Longo","sequence":"additional","affiliation":[]},{"given":"K. A.","family":"Looper","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Lopez","sequence":"additional","affiliation":[]},{"given":"I. Lopez","family":"Paz","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lopez Solis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Lorenz","sequence":"additional","affiliation":[]},{"given":"N.","family":"Lorenzo Martinez","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Lory","sequence":"additional","affiliation":[]},{"given":"M.","family":"Losada","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"L\u00f6sel","sequence":"additional","affiliation":[]},{"given":"A.","family":"L\u00f6sle","sequence":"additional","affiliation":[]},{"given":"X.","family":"Lou","sequence":"additional","affiliation":[]},{"given":"X.","family":"Lou","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lounis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Love","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Love","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Lozano Bahilo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Y. J.","family":"Lu","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Lubatti","sequence":"additional","affiliation":[]},{"given":"C.","family":"Luci","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lucotte","sequence":"additional","affiliation":[]},{"given":"C.","family":"Luedtke","sequence":"additional","affiliation":[]},{"given":"F.","family":"Luehring","sequence":"additional","affiliation":[]},{"given":"I.","family":"Luise","sequence":"additional","affiliation":[]},{"given":"L.","family":"Luminari","sequence":"additional","affiliation":[]},{"given":"B.","family":"Lund-Jensen","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Lutz","sequence":"additional","affiliation":[]},{"given":"D.","family":"Lynn","sequence":"additional","affiliation":[]},{"given":"H.","family":"Lyons","sequence":"additional","affiliation":[]},{"given":"R.","family":"Lysak","sequence":"additional","affiliation":[]},{"given":"E.","family":"Lytken","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lyu","sequence":"additional","affiliation":[]},{"given":"V.","family":"Lyubushkin","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lyubushkina","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ma","sequence":"additional","affiliation":[]},{"given":"L. L.","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Ma","sequence":"additional","affiliation":[]},{"given":"G.","family":"Maccarrone","sequence":"additional","affiliation":[]},{"given":"A.","family":"Macchiolo","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Macdonald","sequence":"additional","affiliation":[]},{"given":"J.","family":"Machado Miguens","sequence":"additional","affiliation":[]},{"given":"D.","family":"Madaffari","sequence":"additional","affiliation":[]},{"given":"R.","family":"Madar","sequence":"additional","affiliation":[]},{"given":"W. F.","family":"Mader","sequence":"additional","affiliation":[]},{"given":"M.","family":"Madugoda Ralalage Don","sequence":"additional","affiliation":[]},{"given":"N.","family":"Madysa","sequence":"additional","affiliation":[]},{"given":"J.","family":"Maeda","sequence":"additional","affiliation":[]},{"given":"T.","family":"Maeno","sequence":"additional","affiliation":[]},{"given":"M.","family":"Maerker","sequence":"additional","affiliation":[]},{"given":"V.","family":"Magerl","sequence":"additional","affiliation":[]},{"given":"N.","family":"Magini","sequence":"additional","affiliation":[]},{"given":"J.","family":"Magro","sequence":"additional","affiliation":[]},{"given":"D. J.","family":"Mahon","sequence":"additional","affiliation":[]},{"given":"C.","family":"Maidantchik","sequence":"additional","affiliation":[]},{"given":"T.","family":"Maier","sequence":"additional","affiliation":[]},{"given":"A.","family":"Maio","sequence":"additional","affiliation":[]},{"given":"K.","family":"Maj","sequence":"additional","affiliation":[]},{"given":"O.","family":"Majersky","sequence":"additional","affiliation":[]},{"given":"S.","family":"Majewski","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Makida","sequence":"additional","affiliation":[]},{"given":"N.","family":"Makovec","sequence":"additional","affiliation":[]},{"given":"B.","family":"Malaescu","sequence":"additional","affiliation":[]},{"given":"Pa.","family":"Malecki","sequence":"additional","affiliation":[]},{"given":"V. P.","family":"Maleev","sequence":"additional","affiliation":[]},{"given":"F.","family":"Malek","sequence":"additional","affiliation":[]},{"given":"U.","family":"Mallik","sequence":"additional","affiliation":[]},{"given":"D.","family":"Malon","sequence":"additional","affiliation":[]},{"given":"C.","family":"Malone","sequence":"additional","affiliation":[]},{"given":"S.","family":"Maltezos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Malyukov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Mamuzic","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mancini","sequence":"additional","affiliation":[]},{"given":"I.","family":"Mandi\u0107","sequence":"additional","affiliation":[]},{"given":"L.","family":"Manhaes de Andrade Filho","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Maniatis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Manjarres Ramos","sequence":"additional","affiliation":[]},{"given":"K. H.","family":"Mankinen","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mann","sequence":"additional","affiliation":[]},{"given":"A.","family":"Manousos","sequence":"additional","affiliation":[]},{"given":"B.","family":"Mansoulie","sequence":"additional","affiliation":[]},{"given":"I.","family":"Manthos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Manzoni","sequence":"additional","affiliation":[]},{"given":"A.","family":"Marantis","sequence":"additional","affiliation":[]},{"given":"G.","family":"Marceca","sequence":"additional","affiliation":[]},{"given":"L.","family":"Marchese","sequence":"additional","affiliation":[]},{"given":"G.","family":"Marchiori","sequence":"additional","affiliation":[]},{"given":"M.","family":"Marcisovsky","sequence":"additional","affiliation":[]},{"given":"L.","family":"Marcoccia","sequence":"additional","affiliation":[]},{"given":"C.","family":"Marcon","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Marin Tobon","sequence":"additional","affiliation":[]},{"given":"M.","family":"Marjanovic","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Marshall","sequence":"additional","affiliation":[]},{"given":"M. U. F.","family":"Martensson","sequence":"additional","affiliation":[]},{"given":"S.","family":"Marti-Garcia","sequence":"additional","affiliation":[]},{"given":"C. B.","family":"Martin","sequence":"additional","affiliation":[]},{"given":"T. A.","family":"Martin","sequence":"additional","affiliation":[]},{"given":"V. J.","family":"Martin","sequence":"additional","affiliation":[]},{"given":"B.","family":"Martin dit Latour","sequence":"additional","affiliation":[]},{"given":"L.","family":"Martinelli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Martinez","sequence":"additional","affiliation":[]},{"given":"V. I.","family":"Martinez Outschoorn","sequence":"additional","affiliation":[]},{"given":"S.","family":"Martin-Haugh","sequence":"additional","affiliation":[]},{"given":"V. S.","family":"Martoiu","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Martyniuk","sequence":"additional","affiliation":[]},{"given":"A.","family":"Marzin","sequence":"additional","affiliation":[]},{"given":"S. R.","family":"Maschek","sequence":"additional","affiliation":[]},{"given":"L.","family":"Masetti","sequence":"additional","affiliation":[]},{"given":"T.","family":"Mashimo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Mashinistov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Masik","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Maslennikov","sequence":"additional","affiliation":[]},{"given":"L.","family":"Massa","sequence":"additional","affiliation":[]},{"given":"P.","family":"Massarotti","sequence":"additional","affiliation":[]},{"given":"P.","family":"Mastrandrea","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mastroberardino","sequence":"additional","affiliation":[]},{"given":"T.","family":"Masubuchi","sequence":"additional","affiliation":[]},{"given":"D.","family":"Matakias","sequence":"additional","affiliation":[]},{"given":"A.","family":"Matic","sequence":"additional","affiliation":[]},{"given":"N.","family":"Matsuzawa","sequence":"additional","affiliation":[]},{"given":"P.","family":"M\u00e4ttig","sequence":"additional","affiliation":[]},{"given":"J.","family":"Maurer","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ma\u010dek","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Maximov","sequence":"additional","affiliation":[]},{"given":"R.","family":"Mazini","sequence":"additional","affiliation":[]},{"given":"I.","family":"Maznas","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Mazza","sequence":"additional","affiliation":[]},{"given":"S. P.","family":"Mc Kee","sequence":"additional","affiliation":[]},{"given":"T. G.","family":"McCarthy","sequence":"additional","affiliation":[]},{"given":"W. P.","family":"McCormack","sequence":"additional","affiliation":[]},{"given":"E. F.","family":"McDonald","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Mcfayden","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mchedlidze","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"McKay","sequence":"additional","affiliation":[]},{"given":"K. D.","family":"McLean","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"McMahon","sequence":"additional","affiliation":[]},{"given":"P. C.","family":"McNamara","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"McNicol","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"McPherson","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Mdhluli","sequence":"additional","affiliation":[]},{"given":"Z. A.","family":"Meadows","sequence":"additional","affiliation":[]},{"given":"S.","family":"Meehan","sequence":"additional","affiliation":[]},{"given":"T.","family":"Megy","sequence":"additional","affiliation":[]},{"given":"S.","family":"Mehlhase","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mehta","sequence":"additional","affiliation":[]},{"given":"T.","family":"Meideck","sequence":"additional","affiliation":[]},{"given":"B.","family":"Meirose","sequence":"additional","affiliation":[]},{"given":"D.","family":"Melini","sequence":"additional","affiliation":[]},{"given":"B. R.","family":"Mellado Garcia","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Mellenthin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Melo","sequence":"additional","affiliation":[]},{"given":"F.","family":"Meloni","sequence":"additional","affiliation":[]},{"given":"A.","family":"Melzer","sequence":"additional","affiliation":[]},{"given":"S. B.","family":"Menary","sequence":"additional","affiliation":[]},{"given":"E. D.","family":"Mendes Gouveia","sequence":"additional","affiliation":[]},{"given":"L.","family":"Meng","sequence":"additional","affiliation":[]},{"given":"X. T.","family":"Meng","sequence":"additional","affiliation":[]},{"given":"S.","family":"Menke","sequence":"additional","affiliation":[]},{"given":"E.","family":"Meoni","sequence":"additional","affiliation":[]},{"given":"S.","family":"Mergelmeyer","sequence":"additional","affiliation":[]},{"given":"S. A. M.","family":"Merkt","sequence":"additional","affiliation":[]},{"given":"C.","family":"Merlassino","sequence":"additional","affiliation":[]},{"given":"P.","family":"Mermod","sequence":"additional","affiliation":[]},{"given":"L.","family":"Merola","sequence":"additional","affiliation":[]},{"given":"C.","family":"Meroni","sequence":"additional","affiliation":[]},{"given":"G.","family":"Merz","sequence":"additional","affiliation":[]},{"given":"O.","family":"Meshkov","sequence":"additional","affiliation":[]},{"given":"J. K. R.","family":"Meshreki","sequence":"additional","affiliation":[]},{"given":"A.","family":"Messina","sequence":"additional","affiliation":[]},{"given":"J.","family":"Metcalfe","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Mete","sequence":"additional","affiliation":[]},{"given":"C.","family":"Meyer","sequence":"additional","affiliation":[]},{"given":"J-P.","family":"Meyer","sequence":"additional","affiliation":[]},{"given":"H.","family":"Meyer Zu Theenhausen","sequence":"additional","affiliation":[]},{"given":"F.","family":"Miano","sequence":"additional","affiliation":[]},{"given":"M.","family":"Michetti","sequence":"additional","affiliation":[]},{"given":"R. P.","family":"Middleton","sequence":"additional","affiliation":[]},{"given":"L.","family":"Mijovi\u0107","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mikenberg","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mikestikova","sequence":"additional","affiliation":[]},{"given":"M.","family":"Miku\u017e","sequence":"additional","affiliation":[]},{"given":"H.","family":"Mildner","sequence":"additional","affiliation":[]},{"given":"M.","family":"Milesi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Milic","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Milke","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Millar","sequence":"additional","affiliation":[]},{"given":"D. W.","family":"Miller","sequence":"additional","affiliation":[]},{"given":"A.","family":"Milov","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Milstead","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Mina","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Minaenko","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mi\u00f1ano Moya","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Minashvili","sequence":"additional","affiliation":[]},{"given":"A. I.","family":"Mincer","sequence":"additional","affiliation":[]},{"given":"B.","family":"Mindur","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mineev","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Minegishi","sequence":"additional","affiliation":[]},{"given":"L. M.","family":"Mir","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mirto","sequence":"additional","affiliation":[]},{"given":"K. P.","family":"Mistry","sequence":"additional","affiliation":[]},{"given":"T.","family":"Mitani","sequence":"additional","affiliation":[]},{"given":"J.","family":"Mitrevski","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Mitsou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mittal","sequence":"additional","affiliation":[]},{"given":"O.","family":"Miu","sequence":"additional","affiliation":[]},{"given":"A.","family":"Miucci","sequence":"additional","affiliation":[]},{"given":"P. S.","family":"Miyagawa","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mizukami","sequence":"additional","affiliation":[]},{"given":"J. U.","family":"Mj\u00f6rnmark","sequence":"additional","affiliation":[]},{"given":"T.","family":"Mkrtchyan","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mlynarikova","sequence":"additional","affiliation":[]},{"given":"T.","family":"Moa","sequence":"additional","affiliation":[]},{"given":"K.","family":"Mochizuki","sequence":"additional","affiliation":[]},{"given":"P.","family":"Mogg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Mohapatra","sequence":"additional","affiliation":[]},{"given":"R.","family":"Moles-Valls","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Mondragon","sequence":"additional","affiliation":[]},{"given":"K.","family":"M\u00f6nig","sequence":"additional","affiliation":[]},{"given":"J.","family":"Monk","sequence":"additional","affiliation":[]},{"given":"E.","family":"Monnier","sequence":"additional","affiliation":[]},{"given":"A.","family":"Montalbano","sequence":"additional","affiliation":[]},{"given":"J.","family":"Montejo Berlingen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Montella","sequence":"additional","affiliation":[]},{"given":"F.","family":"Monticelli","sequence":"additional","affiliation":[]},{"given":"N.","family":"Morange","sequence":"additional","affiliation":[]},{"given":"D.","family":"Moreno","sequence":"additional","affiliation":[]},{"given":"M.","family":"Moreno Ll\u00e1cer","sequence":"additional","affiliation":[]},{"given":"C.","family":"Moreno Martinez","sequence":"additional","affiliation":[]},{"given":"P.","family":"Morettini","sequence":"additional","affiliation":[]},{"given":"M.","family":"Morgenstern","sequence":"additional","affiliation":[]},{"given":"S.","family":"Morgenstern","sequence":"additional","affiliation":[]},{"given":"D.","family":"Mori","sequence":"additional","affiliation":[]},{"given":"M.","family":"Morii","sequence":"additional","affiliation":[]},{"given":"M.","family":"Morinaga","sequence":"additional","affiliation":[]},{"given":"V.","family":"Morisbak","sequence":"additional","affiliation":[]},{"given":"A. K.","family":"Morley","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mornacchi","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"Morris","sequence":"additional","affiliation":[]},{"given":"L.","family":"Morvaj","sequence":"additional","affiliation":[]},{"given":"P.","family":"Moschovakos","sequence":"additional","affiliation":[]},{"given":"B.","family":"Moser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mosidze","sequence":"additional","affiliation":[]},{"given":"T.","family":"Moskalets","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Moss","sequence":"additional","affiliation":[]},{"given":"J.","family":"Moss","sequence":"additional","affiliation":[]},{"given":"E. J. W.","family":"Moyse","sequence":"additional","affiliation":[]},{"given":"S.","family":"Muanza","sequence":"additional","affiliation":[]},{"given":"J.","family":"Mueller","sequence":"additional","affiliation":[]},{"given":"R. S. P.","family":"Mueller","sequence":"additional","affiliation":[]},{"given":"D.","family":"Muenstermann","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Mullier","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Mungo","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Munoz Martinez","sequence":"additional","affiliation":[]},{"given":"F. J.","family":"Munoz Sanchez","sequence":"additional","affiliation":[]},{"given":"P.","family":"Murin","sequence":"additional","affiliation":[]},{"given":"W. J.","family":"Murray","sequence":"additional","affiliation":[]},{"given":"A.","family":"Murrone","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mu\u0161kinja","sequence":"additional","affiliation":[]},{"given":"kinja C.","family":"Mwewa","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Myagkov","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Myers","sequence":"additional","affiliation":[]},{"given":"J.","family":"Myers","sequence":"additional","affiliation":[]},{"given":"M.","family":"Myska","sequence":"additional","affiliation":[]},{"given":"B. P.","family":"Nachman","sequence":"additional","affiliation":[]},{"given":"O.","family":"Nackenhorst","sequence":"additional","affiliation":[]},{"given":"A. Nag","family":"Nag","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nagai","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nagano","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Nagasaka","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Nagle","sequence":"additional","affiliation":[]},{"given":"E.","family":"Nagy","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Nairz","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Nakahama","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nakamura","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nakamura","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nakano","sequence":"additional","affiliation":[]},{"given":"H.","family":"Nanjo","sequence":"additional","affiliation":[]},{"given":"F.","family":"Napolitano","sequence":"additional","affiliation":[]},{"given":"R. F.","family":"Naranjo Garcia","sequence":"additional","affiliation":[]},{"given":"R.","family":"Narayan","sequence":"additional","affiliation":[]},{"given":"I.","family":"Naryshkin","sequence":"additional","affiliation":[]},{"given":"T.","family":"Naumann","sequence":"additional","affiliation":[]},{"given":"G.","family":"Navarro","sequence":"additional","affiliation":[]},{"given":"P. Y.","family":"Nechaeva","sequence":"additional","affiliation":[]},{"given":"F.","family":"Nechansky","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Neep","sequence":"additional","affiliation":[]},{"given":"A.","family":"Negri","sequence":"additional","affiliation":[]},{"given":"M.","family":"Negrini","sequence":"additional","affiliation":[]},{"given":"C.","family":"Nellist","sequence":"additional","affiliation":[]},{"given":"M. E.","family":"Nelson","sequence":"additional","affiliation":[]},{"given":"S.","family":"Nemecek","sequence":"additional","affiliation":[]},{"given":"M.","family":"Nessi","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Neubauer","sequence":"additional","affiliation":[]},{"given":"F.","family":"Neuhaus","sequence":"additional","affiliation":[]},{"given":"M.","family":"Neumann","sequence":"additional","affiliation":[]},{"given":"R.","family":"Newhouse","sequence":"additional","affiliation":[]},{"given":"P. R.","family":"Newman","sequence":"additional","affiliation":[]},{"given":"C. W.","family":"Ng","sequence":"additional","affiliation":[]},{"given":"Y. S.","family":"Ng","sequence":"additional","affiliation":[]},{"given":"Y. W. Y.","family":"Ng","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ngair","sequence":"additional","affiliation":[]},{"given":"H. D. N.","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nguyen Manh","sequence":"additional","affiliation":[]},{"given":"E.","family":"Nibigira","sequence":"additional","affiliation":[]},{"given":"R. B.","family":"Nickerson","sequence":"additional","affiliation":[]},{"given":"R.","family":"Nicolaidou","sequence":"additional","affiliation":[]},{"given":"D. S.","family":"Nielsen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Nielsen","sequence":"additional","affiliation":[]},{"given":"N.","family":"Nikiforou","sequence":"additional","affiliation":[]},{"given":"V.","family":"Nikolaenko","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nikolic-Audit","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nikolopoulos","sequence":"additional","affiliation":[]},{"given":"P.","family":"Nilsson","sequence":"additional","affiliation":[]},{"given":"H. R.","family":"Nindhito","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Ninomiya","sequence":"additional","affiliation":[]},{"given":"A.","family":"Nisati","sequence":"additional","affiliation":[]},{"given":"N.","family":"Nishu","sequence":"additional","affiliation":[]},{"given":"R.","family":"Nisius","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nitsche","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nitta","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nobe","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Noguchi","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nomidis","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Nomura","sequence":"additional","affiliation":[]},{"given":"M.","family":"Nordberg","sequence":"additional","affiliation":[]},{"given":"T.","family":"Novak","sequence":"additional","affiliation":[]},{"given":"O.","family":"Novgorodova","sequence":"additional","affiliation":[]},{"given":"R.","family":"Novotny","sequence":"additional","affiliation":[]},{"given":"L.","family":"Nozka","sequence":"additional","affiliation":[]},{"given":"K.","family":"Ntekas","sequence":"additional","affiliation":[]},{"given":"E.","family":"Nurse","sequence":"additional","affiliation":[]},{"given":"F. G.","family":"Oakham","sequence":"additional","affiliation":[]},{"given":"H.","family":"Oberlack","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ocariz","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ochi","sequence":"additional","affiliation":[]},{"given":"I.","family":"Ochoa","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Ochoa-Ricoux","sequence":"additional","affiliation":[]},{"given":"K.","family":"O\u2019Connor","sequence":"additional","affiliation":[]},{"given":"S.","family":"Oda","sequence":"additional","affiliation":[]},{"given":"S.","family":"Odaka","sequence":"additional","affiliation":[]},{"given":"S.","family":"Oerdek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ogrodnik","sequence":"additional","affiliation":[]},{"given":"A.","family":"Oh","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Oh","sequence":"additional","affiliation":[]},{"given":"C. C.","family":"Ohm","sequence":"additional","affiliation":[]},{"given":"H.","family":"Oide","sequence":"additional","affiliation":[]},{"given":"M. L.","family":"Ojeda","sequence":"additional","affiliation":[]},{"given":"H.","family":"Okawa","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Okazaki","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"O\u2019Keefe","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Okumura","sequence":"additional","affiliation":[]},{"given":"T.","family":"Okuyama","sequence":"additional","affiliation":[]},{"given":"A.","family":"Olariu","sequence":"additional","affiliation":[]},{"given":"L. F.","family":"Oleiro Seabra","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Olivares Pino","sequence":"additional","affiliation":[]},{"given":"D.","family":"Oliveira Damazio","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Oliver","sequence":"additional","affiliation":[]},{"given":"M. J. R.","family":"Olsson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Olszewski","sequence":"additional","affiliation":[]},{"given":"J.","family":"Olszowska","sequence":"additional","affiliation":[]},{"given":"D. C.","family":"O\u2019Neil","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"O\u2019neill","sequence":"additional","affiliation":[]},{"given":"A.","family":"Onofre","sequence":"additional","affiliation":[]},{"given":"P. U. E.","family":"Onyisi","sequence":"additional","affiliation":[]},{"given":"H.","family":"Oppen","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Oreglia","sequence":"additional","affiliation":[]},{"given":"G. E.","family":"Orellana","sequence":"additional","affiliation":[]},{"given":"D.","family":"Orestano","sequence":"additional","affiliation":[]},{"given":"N.","family":"Orlando","sequence":"additional","affiliation":[]},{"given":"R. S.","family":"Orr","sequence":"additional","affiliation":[]},{"given":"V.","family":"O\u2019Shea","sequence":"additional","affiliation":[]},{"given":"R.","family":"Ospanov","sequence":"additional","affiliation":[]},{"given":"G.","family":"Otero y Garzon","sequence":"additional","affiliation":[]},{"given":"H.","family":"Otono","sequence":"additional","affiliation":[]},{"given":"P. S.","family":"Ott","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ouchrif","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ouellette","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ould-Saada","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ouraou","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Ouyang","sequence":"additional","affiliation":[]},{"given":"M.","family":"Owen","sequence":"additional","affiliation":[]},{"given":"R. E.","family":"Owen","sequence":"additional","affiliation":[]},{"given":"V. E.","family":"Ozcan","sequence":"additional","affiliation":[]},{"given":"N.","family":"Ozturk","sequence":"additional","affiliation":[]},{"given":"J.","family":"Pacalt","sequence":"additional","affiliation":[]},{"given":"H. A.","family":"Pacey","sequence":"additional","affiliation":[]},{"given":"K.","family":"Pachal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Pacheco Pages","sequence":"additional","affiliation":[]},{"given":"C.","family":"Padilla Aranda","sequence":"additional","affiliation":[]},{"given":"S.","family":"Pagan Griso","sequence":"additional","affiliation":[]},{"given":"M.","family":"Paganini","sequence":"additional","affiliation":[]},{"given":"G.","family":"Palacino","sequence":"additional","affiliation":[]},{"given":"S.","family":"Palazzo","sequence":"additional","affiliation":[]},{"given":"S.","family":"Palestini","sequence":"additional","affiliation":[]},{"given":"M.","family":"Palka","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pallin","sequence":"additional","affiliation":[]},{"given":"P.","family":"Palni","sequence":"additional","affiliation":[]},{"given":"I.","family":"Panagoulias","sequence":"additional","affiliation":[]},{"given":"C. E.","family":"Pandini","sequence":"additional","affiliation":[]},{"given":"J. G.","family":"Panduro Vazquez","sequence":"additional","affiliation":[]},{"given":"P.","family":"Pani","sequence":"additional","affiliation":[]},{"given":"G.","family":"Panizzo","sequence":"additional","affiliation":[]},{"given":"L.","family":"Paolozzi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Papadatos","sequence":"additional","affiliation":[]},{"given":"K.","family":"Papageorgiou","sequence":"additional","affiliation":[]},{"given":"S.","family":"Parajuli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Paramonov","sequence":"additional","affiliation":[]},{"given":"D.","family":"Paredes Hernandez","sequence":"additional","affiliation":[]},{"given":"S. R.","family":"Paredes Saenz","sequence":"additional","affiliation":[]},{"given":"B.","family":"Parida","sequence":"additional","affiliation":[]},{"given":"T. H.","family":"Park","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Parker","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Parker","sequence":"additional","affiliation":[]},{"given":"F.","family":"Parodi","sequence":"additional","affiliation":[]},{"given":"E. W.","family":"Parrish","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Parsons","sequence":"additional","affiliation":[]},{"given":"U.","family":"Parzefall","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pascual Dominguez","sequence":"additional","affiliation":[]},{"given":"V. R.","family":"Pascuzzi","sequence":"additional","affiliation":[]},{"given":"J. M. P.","family":"Pasner","sequence":"additional","affiliation":[]},{"given":"F.","family":"Pasquali","sequence":"additional","affiliation":[]},{"given":"E.","family":"Pasqualucci","sequence":"additional","affiliation":[]},{"given":"S.","family":"Passaggio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Pastore","sequence":"additional","affiliation":[]},{"given":"P.","family":"Pasuwan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Pataraia","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Pater","sequence":"additional","affiliation":[]},{"given":"A.","family":"Pathak","sequence":"additional","affiliation":[]},{"given":"J.","family":"Patton","sequence":"additional","affiliation":[]},{"given":"T.","family":"Pauly","sequence":"additional","affiliation":[]},{"given":"J.","family":"Pearkes","sequence":"additional","affiliation":[]},{"given":"B.","family":"Pearson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pedersen","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pedraza Diaz","sequence":"additional","affiliation":[]},{"given":"R.","family":"Pedro","sequence":"additional","affiliation":[]},{"given":"T.","family":"Peiffer","sequence":"additional","affiliation":[]},{"given":"S. V.","family":"Peleganchuk","sequence":"additional","affiliation":[]},{"given":"O.","family":"Penc","sequence":"additional","affiliation":[]},{"given":"H.","family":"Peng","sequence":"additional","affiliation":[]},{"given":"B. S.","family":"Peralva","sequence":"additional","affiliation":[]},{"given":"M. M.","family":"Perego","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"Pereira Peixoto","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pereira Sanchez","sequence":"additional","affiliation":[]},{"given":"D. V.","family":"Perepelitsa","sequence":"additional","affiliation":[]},{"given":"F.","family":"Peri","sequence":"additional","affiliation":[]},{"given":"L.","family":"Perini","sequence":"additional","affiliation":[]},{"given":"H.","family":"Pernegger","sequence":"additional","affiliation":[]},{"given":"S.","family":"Perrella","sequence":"additional","affiliation":[]},{"given":"A.","family":"Perrevoort","sequence":"additional","affiliation":[]},{"given":"K.","family":"Peters","sequence":"additional","affiliation":[]},{"given":"R. F. Y.","family":"Peters","sequence":"additional","affiliation":[]},{"given":"B. A.","family":"Petersen","sequence":"additional","affiliation":[]},{"given":"T. C.","family":"Petersen","sequence":"additional","affiliation":[]},{"given":"E.","family":"Petit","sequence":"additional","affiliation":[]},{"given":"A.","family":"Petridis","sequence":"additional","affiliation":[]},{"given":"C.","family":"Petridou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Petrov","sequence":"additional","affiliation":[]},{"given":"F.","family":"Petrucci","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pettee","sequence":"additional","affiliation":[]},{"given":"N. E.","family":"Pettersson","sequence":"additional","affiliation":[]},{"given":"K.","family":"Petukhova","sequence":"additional","affiliation":[]},{"given":"A.","family":"Peyaud","sequence":"additional","affiliation":[]},{"given":"R.","family":"Pezoa","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pezzotti","sequence":"additional","affiliation":[]},{"given":"T.","family":"Pham","sequence":"additional","affiliation":[]},{"given":"F. H.","family":"Phillips","sequence":"additional","affiliation":[]},{"given":"P. W.","family":"Phillips","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"Phipps","sequence":"additional","affiliation":[]},{"given":"G.","family":"Piacquadio","sequence":"additional","affiliation":[]},{"given":"E.","family":"Pianori","sequence":"additional","affiliation":[]},{"given":"A.","family":"Picazio","sequence":"additional","affiliation":[]},{"given":"R. H.","family":"Pickles","sequence":"additional","affiliation":[]},{"given":"R.","family":"Piegaia","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pietreanu","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Pilcher","sequence":"additional","affiliation":[]},{"given":"A. D.","family":"Pilkington","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pinamonti","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Pinfold","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pitt","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pizzimento","sequence":"additional","affiliation":[]},{"given":"M.-A.","family":"Pleier","sequence":"additional","affiliation":[]},{"given":"V.","family":"Pleskot","sequence":"additional","affiliation":[]},{"given":"E.","family":"Plotnikova","sequence":"additional","affiliation":[]},{"given":"P.","family":"Podberezko","sequence":"additional","affiliation":[]},{"given":"R.","family":"Poettgen","sequence":"additional","affiliation":[]},{"given":"R.","family":"Poggi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Poggioli","sequence":"additional","affiliation":[]},{"given":"I.","family":"Pogrebnyak","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pohl","sequence":"additional","affiliation":[]},{"given":"I.","family":"Pokharel","sequence":"additional","affiliation":[]},{"given":"G.","family":"Polesello","sequence":"additional","affiliation":[]},{"given":"A.","family":"Poley","sequence":"additional","affiliation":[]},{"given":"A.","family":"Policicchio","sequence":"additional","affiliation":[]},{"given":"R.","family":"Polifka","sequence":"additional","affiliation":[]},{"given":"A.","family":"Polini","sequence":"additional","affiliation":[]},{"given":"C. S.","family":"Pollard","sequence":"additional","affiliation":[]},{"given":"V.","family":"Polychronakos","sequence":"additional","affiliation":[]},{"given":"D.","family":"Ponomarenko","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pontecorvo","sequence":"additional","affiliation":[]},{"given":"S.","family":"Popa","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Popeneciu","sequence":"additional","affiliation":[]},{"given":"L.","family":"Portales","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Portillo Quintero","sequence":"additional","affiliation":[]},{"given":"S.","family":"Pospisil","sequence":"additional","affiliation":[]},{"given":"K.","family":"Potamianos","sequence":"additional","affiliation":[]},{"given":"I. N.","family":"Potrap","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Potter","sequence":"additional","affiliation":[]},{"given":"H.","family":"Potti","sequence":"additional","affiliation":[]},{"given":"T.","family":"Poulsen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Poveda","sequence":"additional","affiliation":[]},{"given":"T. D.","family":"Powell","sequence":"additional","affiliation":[]},{"given":"G.","family":"Pownall","sequence":"additional","affiliation":[]},{"given":"M. E.","family":"Pozo Astigarraga","sequence":"additional","affiliation":[]},{"given":"P.","family":"Pralavorio","sequence":"additional","affiliation":[]},{"given":"S.","family":"Prell","sequence":"additional","affiliation":[]},{"given":"D.","family":"Price","sequence":"additional","affiliation":[]},{"given":"M.","family":"Primavera","sequence":"additional","affiliation":[]},{"given":"S.","family":"Prince","sequence":"additional","affiliation":[]},{"given":"M. L.","family":"Proffitt","sequence":"additional","affiliation":[]},{"given":"N.","family":"Proklova","sequence":"additional","affiliation":[]},{"given":"K.","family":"Prokofiev","sequence":"additional","affiliation":[]},{"given":"F.","family":"Prokoshin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Protopopescu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Proudfoot","sequence":"additional","affiliation":[]},{"given":"M.","family":"Przybycien","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pudzha","sequence":"additional","affiliation":[]},{"given":"A.","family":"Puri","sequence":"additional","affiliation":[]},{"given":"P.","family":"Puzo","sequence":"additional","affiliation":[]},{"given":"J.","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Qin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Quadt","sequence":"additional","affiliation":[]},{"given":"M.","family":"Queitsch-Maitland","sequence":"additional","affiliation":[]},{"given":"A.","family":"Qureshi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Racko","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ragusa","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rahal","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Raine","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rajagopalan","sequence":"additional","affiliation":[]},{"given":"A. Ramirez","family":"Morales","sequence":"additional","affiliation":[]},{"given":"K.","family":"Ran","sequence":"additional","affiliation":[]},{"given":"T.","family":"Rashid","sequence":"additional","affiliation":[]},{"given":"S.","family":"Raspopov","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Rauch","sequence":"additional","affiliation":[]},{"given":"F.","family":"Rauscher","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rave","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ravina","sequence":"additional","affiliation":[]},{"given":"I.","family":"Ravinovich","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Rawling","sequence":"additional","affiliation":[]},{"given":"M.","family":"Raymond","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Read","sequence":"additional","affiliation":[]},{"given":"N. P.","family":"Readioff","sequence":"additional","affiliation":[]},{"given":"M.","family":"Reale","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Rebuzzi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Redelbach","sequence":"additional","affiliation":[]},{"given":"G.","family":"Redlinger","sequence":"additional","affiliation":[]},{"given":"K.","family":"Reeves","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rehnisch","sequence":"additional","affiliation":[]},{"given":"J.","family":"Reichert","sequence":"additional","affiliation":[]},{"given":"D.","family":"Reikher","sequence":"additional","affiliation":[]},{"given":"A.","family":"Reiss","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rej","sequence":"additional","affiliation":[]},{"given":"C.","family":"Rembser","sequence":"additional","affiliation":[]},{"given":"A.","family":"Renardi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Renda","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rescigno","sequence":"additional","affiliation":[]},{"given":"S.","family":"Resconi","sequence":"additional","affiliation":[]},{"given":"E. D.","family":"Resseguie","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rettie","sequence":"additional","affiliation":[]},{"given":"B.","family":"Reynolds","sequence":"additional","affiliation":[]},{"given":"E.","family":"Reynolds","sequence":"additional","affiliation":[]},{"given":"O. L.","family":"Rezanova","sequence":"additional","affiliation":[]},{"given":"P.","family":"Reznicek","sequence":"additional","affiliation":[]},{"given":"E.","family":"Ricci","sequence":"additional","affiliation":[]},{"given":"R.","family":"Richter","sequence":"additional","affiliation":[]},{"given":"S.","family":"Richter","sequence":"additional","affiliation":[]},{"given":"E.","family":"Richter-Was","sequence":"additional","affiliation":[]},{"given":"O.","family":"Ricken","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ridel","sequence":"additional","affiliation":[]},{"given":"P.","family":"Rieck","sequence":"additional","affiliation":[]},{"given":"O.","family":"Rifki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rijssenbeek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rimoldi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rimoldi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rinaldi","sequence":"additional","affiliation":[]},{"given":"G.","family":"Ripellino","sequence":"additional","affiliation":[]},{"given":"I.","family":"Riu","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Rivera Vergara","sequence":"additional","affiliation":[]},{"given":"F.","family":"Rizatdinova","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rizvi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Rizzi","sequence":"additional","affiliation":[]},{"given":"R. T.","family":"Roberts","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Robertson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Robin","sequence":"additional","affiliation":[]},{"given":"D.","family":"Robinson","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Robles Gajardo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Robles Manzano","sequence":"additional","affiliation":[]},{"given":"A.","family":"Robson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rocchi","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rocco","sequence":"additional","affiliation":[]},{"given":"C.","family":"Roda","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rodriguez Bosca","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rodriguez Perez","sequence":"additional","affiliation":[]},{"given":"D.","family":"Rodriguez Rodriguez","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Rodr\u00edguez Vera","sequence":"additional","affiliation":[]},{"given":"S.","family":"Roe","sequence":"additional","affiliation":[]},{"given":"O.","family":"R\u00f8hne","sequence":"additional","affiliation":[]},{"given":"R.","family":"R\u00f6hrig","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Rojas","sequence":"additional","affiliation":[]},{"given":"B.","family":"Roland","sequence":"additional","affiliation":[]},{"given":"C. P. A.","family":"Roland","sequence":"additional","affiliation":[]},{"given":"J.","family":"Roloff","sequence":"additional","affiliation":[]},{"given":"A.","family":"Romaniouk","sequence":"additional","affiliation":[]},{"given":"M.","family":"Romano","sequence":"additional","affiliation":[]},{"given":"N.","family":"Rompotis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ronzani","sequence":"additional","affiliation":[]},{"given":"L.","family":"Roos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rosati","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rosin","sequence":"additional","affiliation":[]},{"given":"B. J.","family":"Rosser","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"L. P.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rossini","sequence":"additional","affiliation":[]},{"given":"R.","family":"Rosten","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rotaru","sequence":"additional","affiliation":[]},{"given":"J.","family":"Rothberg","sequence":"additional","affiliation":[]},{"given":"B.","family":"Rottler","sequence":"additional","affiliation":[]},{"given":"D.","family":"Rousseau","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rovelli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Roy","sequence":"additional","affiliation":[]},{"given":"D.","family":"Roy","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rozanov","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Rozen","sequence":"additional","affiliation":[]},{"given":"X.","family":"Ruan","sequence":"additional","affiliation":[]},{"given":"F.","family":"R\u00fchr","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ruiz-Martinez","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rummler","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Rurikova","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Rusakovich","sequence":"additional","affiliation":[]},{"given":"H. L.","family":"Russell","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rustige","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Rutherfoord","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"R\u00fcttinger","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rybar","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rybkin","sequence":"additional","affiliation":[]},{"given":"E. B.","family":"Rye","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ryzhov","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Sabater Iglesias","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sabatini","sequence":"additional","affiliation":[]},{"given":"G.","family":"Sabato","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sacerdoti","sequence":"additional","affiliation":[]},{"given":"H. F-W.","family":"Sadrozinski","sequence":"additional","affiliation":[]},{"given":"R.","family":"Sadykov","sequence":"additional","affiliation":[]},{"given":"F.","family":"Safai Tehrani","sequence":"additional","affiliation":[]},{"given":"B.","family":"Safarzadeh Samani","sequence":"additional","affiliation":[]},{"given":"M.","family":"Safdari","sequence":"additional","affiliation":[]},{"given":"P.","family":"Saha","sequence":"additional","affiliation":[]},{"given":"S.","family":"Saha","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sahinsoy","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sahu","sequence":"additional","affiliation":[]},{"given":"M.","family":"Saimpert","sequence":"additional","affiliation":[]},{"given":"M.","family":"Saito","sequence":"additional","affiliation":[]},{"given":"T.","family":"Saito","sequence":"additional","affiliation":[]},{"given":"H.","family":"Sakamoto","sequence":"additional","affiliation":[]},{"given":"D.","family":"Salamani","sequence":"additional","affiliation":[]},{"given":"G.","family":"Salamanna","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Salazar Loyola","sequence":"additional","affiliation":[]},{"given":"A.","family":"Salnikov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Salt","sequence":"additional","affiliation":[]},{"given":"D.","family":"Salvatore","sequence":"additional","affiliation":[]},{"given":"F.","family":"Salvatore","sequence":"additional","affiliation":[]},{"given":"A.","family":"Salvucci","sequence":"additional","affiliation":[]},{"given":"A.","family":"Salzburger","sequence":"additional","affiliation":[]},{"given":"J.","family":"Samarati","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sammel","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sampsonidis","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sampsonidou","sequence":"additional","affiliation":[]},{"given":"J.","family":"S\u00e1nchez","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sanchez Pineda","sequence":"additional","affiliation":[]},{"given":"H.","family":"Sandaker","sequence":"additional","affiliation":[]},{"given":"C. O.","family":"Sander","sequence":"additional","affiliation":[]},{"given":"I. G.","family":"Sanderswood","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sandhoff","sequence":"additional","affiliation":[]},{"given":"C.","family":"Sandoval","sequence":"additional","affiliation":[]},{"given":"D. P. C.","family":"Sankey","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sannino","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Sano","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sansoni","sequence":"additional","affiliation":[]},{"given":"C.","family":"Santoni","sequence":"additional","affiliation":[]},{"given":"H.","family":"Santos","sequence":"additional","affiliation":[]},{"given":"S. N.","family":"Santpur","sequence":"additional","affiliation":[]},{"given":"A.","family":"Santra","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sapronov","sequence":"additional","affiliation":[]},{"given":"J. G.","family":"Saraiva","sequence":"additional","affiliation":[]},{"given":"J.","family":"Sardain","sequence":"additional","affiliation":[]},{"given":"O.","family":"Sasaki","sequence":"additional","affiliation":[]},{"given":"K.","family":"Sato","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sauerburger","sequence":"additional","affiliation":[]},{"given":"E.","family":"Sauvan","sequence":"additional","affiliation":[]},{"given":"P.","family":"Savard","sequence":"additional","affiliation":[]},{"given":"R.","family":"Sawada","sequence":"additional","affiliation":[]},{"given":"C.","family":"Sawyer","sequence":"additional","affiliation":[]},{"given":"L.","family":"Sawyer","sequence":"additional","affiliation":[]},{"given":"C.","family":"Sbarra","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sbrizzi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Scanlon","sequence":"additional","affiliation":[]},{"given":"J.","family":"Schaarschmidt","sequence":"additional","affiliation":[]},{"given":"P.","family":"Schacht","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Schachtner","sequence":"additional","affiliation":[]},{"given":"D.","family":"Schaefer","sequence":"additional","affiliation":[]},{"given":"L.","family":"Schaefer","sequence":"additional","affiliation":[]},{"given":"J.","family":"Schaeffer","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schaepe","sequence":"additional","affiliation":[]},{"given":"U.","family":"Sch\u00e4fer","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Schaffer","sequence":"additional","affiliation":[]},{"given":"D.","family":"Schaile","sequence":"additional","affiliation":[]},{"given":"R. D.","family":"Schamberger","sequence":"additional","affiliation":[]},{"given":"N.","family":"Scharmberg","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Schegelsky","sequence":"additional","affiliation":[]},{"given":"D.","family":"Scheirich","sequence":"additional","affiliation":[]},{"given":"F.","family":"Schenck","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schernau","sequence":"additional","affiliation":[]},{"given":"C.","family":"Schiavi","sequence":"additional","affiliation":[]},{"given":"L. K.","family":"Schildgen","sequence":"additional","affiliation":[]},{"given":"Z. M.","family":"Schillaci","sequence":"additional","affiliation":[]},{"given":"E. J.","family":"Schioppa","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schioppa","sequence":"additional","affiliation":[]},{"given":"K. E.","family":"Schleicher","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schlenker","sequence":"additional","affiliation":[]},{"given":"K. R.","family":"Schmidt-Sommerfeld","sequence":"additional","affiliation":[]},{"given":"K.","family":"Schmieden","sequence":"additional","affiliation":[]},{"given":"C.","family":"Schmitt","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schmitt","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schmitz","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Schmoeckel","sequence":"additional","affiliation":[]},{"given":"L.","family":"Schoeffel","sequence":"additional","affiliation":[]},{"given":"A.","family":"Schoening","sequence":"additional","affiliation":[]},{"given":"P. G.","family":"Scholer","sequence":"additional","affiliation":[]},{"given":"E.","family":"Schopf","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schott","sequence":"additional","affiliation":[]},{"given":"J. F. P.","family":"Schouwenberg","sequence":"additional","affiliation":[]},{"given":"J.","family":"Schovancova","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schramm","sequence":"additional","affiliation":[]},{"given":"F.","family":"Schroeder","sequence":"additional","affiliation":[]},{"given":"A.","family":"Schulte","sequence":"additional","affiliation":[]},{"given":"H-C.","family":"Schultz-Coulon","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schumacher","sequence":"additional","affiliation":[]},{"given":"B. A.","family":"Schumm","sequence":"additional","affiliation":[]},{"given":"Ph.","family":"Schune","sequence":"additional","affiliation":[]},{"given":"A.","family":"Schwartzman","sequence":"additional","affiliation":[]},{"given":"T. A.","family":"Schwarz","sequence":"additional","affiliation":[]},{"given":"Ph.","family":"Schwemling","sequence":"additional","affiliation":[]},{"given":"R.","family":"Schwienhorst","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sciandra","sequence":"additional","affiliation":[]},{"given":"G.","family":"Sciolla","sequence":"additional","affiliation":[]},{"given":"M.","family":"Scodeggio","sequence":"additional","affiliation":[]},{"given":"M.","family":"Scornajenghi","sequence":"additional","affiliation":[]},{"given":"F.","family":"Scuri","sequence":"additional","affiliation":[]},{"given":"F.","family":"Scutti","sequence":"additional","affiliation":[]},{"given":"L. M.","family":"Scyboz","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Sebastiani","sequence":"additional","affiliation":[]},{"given":"P.","family":"Seema","sequence":"additional","affiliation":[]},{"given":"S. C.","family":"Seidel","sequence":"additional","affiliation":[]},{"given":"A.","family":"Seiden","sequence":"additional","affiliation":[]},{"given":"B. D.","family":"Seidlitz","sequence":"additional","affiliation":[]},{"given":"T.","family":"Seiss","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Seixas","sequence":"additional","affiliation":[]},{"given":"G.","family":"Sekhniaidze","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Sekula","sequence":"additional","affiliation":[]},{"given":"N.","family":"Semprini-Cesari","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Serfon","sequence":"additional","affiliation":[]},{"given":"L.","family":"Serin","sequence":"additional","affiliation":[]},{"given":"L.","family":"Serkin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sessa","sequence":"additional","affiliation":[]},{"given":"H.","family":"Severini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sevova","sequence":"additional","affiliation":[]},{"given":"T.","family":"\u0160filigoj","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sforza","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sfyrla","sequence":"additional","affiliation":[]},{"given":"E.","family":"Shabalina","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Shahinian","sequence":"additional","affiliation":[]},{"given":"N. W.","family":"Shaikh","sequence":"additional","affiliation":[]},{"given":"D.","family":"Shaked Renous","sequence":"additional","affiliation":[]},{"given":"L. Y.","family":"Shan","sequence":"additional","affiliation":[]},{"given":"J. T.","family":"Shank","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shapiro","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sharma","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Sharma","sequence":"additional","affiliation":[]},{"given":"P. B.","family":"Shatalov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Shaw","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Shaw","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shehade","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Shen","sequence":"additional","affiliation":[]},{"given":"A. D.","family":"Sherman","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sherwood","sequence":"additional","affiliation":[]},{"given":"L.","family":"Shi","sequence":"additional","affiliation":[]},{"given":"S.","family":"Shimizu","sequence":"additional","affiliation":[]},{"given":"C. O.","family":"Shimmin","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Shimogama","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shimojima","sequence":"additional","affiliation":[]},{"given":"I. P. J.","family":"Shipsey","sequence":"additional","affiliation":[]},{"given":"S.","family":"Shirabe","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shiyakova","sequence":"additional","affiliation":[]},{"given":"J.","family":"Shlomi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Shmeleva","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Shochet","sequence":"additional","affiliation":[]},{"given":"J.","family":"Shojaii","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Shope","sequence":"additional","affiliation":[]},{"given":"S.","family":"Shrestha","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Shrif","sequence":"additional","affiliation":[]},{"given":"E.","family":"Shulga","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sicho","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Sickles","sequence":"additional","affiliation":[]},{"given":"P. E.","family":"Sidebo","sequence":"additional","affiliation":[]},{"given":"E.","family":"Sideras Haddad","sequence":"additional","affiliation":[]},{"given":"O.","family":"Sidiropoulou","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sidoti","sequence":"additional","affiliation":[]},{"given":"F.","family":"Siegert","sequence":"additional","affiliation":[]},{"given":"Dj.","family":"Sijacki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Silva","sequence":"additional","affiliation":[]},{"given":"M. V.","family":"Silva Oliveira","sequence":"additional","affiliation":[]},{"given":"S. B.","family":"Silverstein","sequence":"additional","affiliation":[]},{"given":"S.","family":"Simion","sequence":"additional","affiliation":[]},{"given":"R.","family":"Simoniello","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Simpson-allsop","sequence":"additional","affiliation":[]},{"given":"S.","family":"Simsek","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sinervo","sequence":"additional","affiliation":[]},{"given":"V.","family":"Sinetckii","sequence":"additional","affiliation":[]},{"given":"S.","family":"Singh","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sioli","sequence":"additional","affiliation":[]},{"given":"I.","family":"Siral","sequence":"additional","affiliation":[]},{"given":"S. Yu.","family":"Sivoklokov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Sj\u00f6lin","sequence":"additional","affiliation":[]},{"given":"E.","family":"Skorda","sequence":"additional","affiliation":[]},{"given":"P.","family":"Skubic","sequence":"additional","affiliation":[]},{"given":"M.","family":"Slawinska","sequence":"additional","affiliation":[]},{"given":"K.","family":"Sliwa","sequence":"additional","affiliation":[]},{"given":"R.","family":"Slovak","sequence":"additional","affiliation":[]},{"given":"V.","family":"Smakhtin","sequence":"additional","affiliation":[]},{"given":"B. H.","family":"Smart","sequence":"additional","affiliation":[]},{"given":"J.","family":"Smiesko","sequence":"additional","affiliation":[]},{"given":"N.","family":"Smirnov","sequence":"additional","affiliation":[]},{"given":"S. Yu.","family":"Smirnov","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Smirnov","sequence":"additional","affiliation":[]},{"given":"L. N.","family":"Smirnova","sequence":"additional","affiliation":[]},{"given":"O.","family":"Smirnova","sequence":"additional","affiliation":[]},{"given":"J. W.","family":"Smith","sequence":"additional","affiliation":[]},{"given":"M.","family":"Smizanska","sequence":"additional","affiliation":[]},{"given":"K.","family":"Smolek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Smykiewicz","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Snesarev","sequence":"additional","affiliation":[]},{"given":"H. L.","family":"Snoek","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Snyder","sequence":"additional","affiliation":[]},{"given":"S.","family":"Snyder","sequence":"additional","affiliation":[]},{"given":"R.","family":"Sobie","sequence":"additional","affiliation":[]},{"given":"A.","family":"Soffer","sequence":"additional","affiliation":[]},{"given":"A.","family":"S\u00f8gaard","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sohns","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Solans Sanchez","sequence":"additional","affiliation":[]},{"given":"E. Yu.","family":"Soldatov","sequence":"additional","affiliation":[]},{"given":"U.","family":"Soldevila","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Solodkov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Soloshenko","sequence":"additional","affiliation":[]},{"given":"O. V.","family":"Solovyanov","sequence":"additional","affiliation":[]},{"given":"V.","family":"Solovyev","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sommer","sequence":"additional","affiliation":[]},{"given":"H.","family":"Son","sequence":"additional","affiliation":[]},{"given":"W.","family":"Song","sequence":"additional","affiliation":[]},{"given":"W. Y.","family":"Song","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sopczak","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Sopio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sopkova","sequence":"additional","affiliation":[]},{"given":"C. L.","family":"Sotiropoulou","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sottocornola","sequence":"additional","affiliation":[]},{"given":"R.","family":"Soualah","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Soukharev","sequence":"additional","affiliation":[]},{"given":"D.","family":"South","sequence":"additional","affiliation":[]},{"given":"S.","family":"Spagnolo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spalla","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spangenberg","sequence":"additional","affiliation":[]},{"given":"F.","family":"Span\u00f2","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sperlich","sequence":"additional","affiliation":[]},{"given":"T. M.","family":"Spieker","sequence":"additional","affiliation":[]},{"given":"G.","family":"Spigo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spina","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Spiteri","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spousta","sequence":"additional","affiliation":[]},{"given":"A.","family":"Stabile","sequence":"additional","affiliation":[]},{"given":"R.","family":"Stamen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stamenkovic","sequence":"additional","affiliation":[]},{"given":"E.","family":"Stanecka","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stanislaus","sequence":"additional","affiliation":[]},{"given":"M. M.","family":"Stanitzki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stankaityte","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stapf","sequence":"additional","affiliation":[]},{"given":"E. A.","family":"Starchenko","sequence":"additional","affiliation":[]},{"given":"G. H.","family":"Stark","sequence":"additional","affiliation":[]},{"given":"J.","family":"Stark","sequence":"additional","affiliation":[]},{"given":"P.","family":"Staroba","sequence":"additional","affiliation":[]},{"given":"P.","family":"Starovoitov","sequence":"additional","affiliation":[]},{"given":"S.","family":"St\u00e4rz","sequence":"additional","affiliation":[]},{"given":"R.","family":"Staszewski","sequence":"additional","affiliation":[]},{"given":"G.","family":"Stavropoulos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stegler","sequence":"additional","affiliation":[]},{"given":"P.","family":"Steinberg","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Steinhebel","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stelzer","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Stelzer","sequence":"additional","affiliation":[]},{"given":"O.","family":"Stelzer-Chilton","sequence":"additional","affiliation":[]},{"given":"H.","family":"Stenzel","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Stevenson","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Stewart","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Stockton","sequence":"additional","affiliation":[]},{"given":"G.","family":"Stoicea","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stolarski","sequence":"additional","affiliation":[]},{"given":"S.","family":"Stonjek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Straessner","sequence":"additional","affiliation":[]},{"given":"J.","family":"Strandberg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Strandberg","sequence":"additional","affiliation":[]},{"given":"M.","family":"Strauss","sequence":"additional","affiliation":[]},{"given":"P.","family":"Strizenec","sequence":"additional","affiliation":[]},{"given":"R.","family":"Str\u00f6hmer","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Strom","sequence":"additional","affiliation":[]},{"given":"R.","family":"Stroynowski","sequence":"additional","affiliation":[]},{"given":"A.","family":"Strubig","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Stucci","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stugu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Stupak","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Styles","sequence":"additional","affiliation":[]},{"given":"D.","family":"Su","sequence":"additional","affiliation":[]},{"given":"W.","family":"Su","sequence":"additional","affiliation":[]},{"given":"S.","family":"Suchek","sequence":"additional","affiliation":[]},{"given":"V. V.","family":"Sulin","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Sullivan","sequence":"additional","affiliation":[]},{"given":"D. M. S.","family":"Sultan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sultansoy","sequence":"additional","affiliation":[]},{"given":"T.","family":"Sumida","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sun","sequence":"additional","affiliation":[]},{"given":"X.","family":"Sun","sequence":"additional","affiliation":[]},{"given":"K.","family":"Suruliz","sequence":"additional","affiliation":[]},{"given":"C. J. E.","family":"Suster","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Sutton","sequence":"additional","affiliation":[]},{"given":"S.","family":"Suzuki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Svatos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Swiatlowski","sequence":"additional","affiliation":[]},{"given":"S. P.","family":"Swift","sequence":"additional","affiliation":[]},{"given":"T.","family":"Swirski","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sydorenko","sequence":"additional","affiliation":[]},{"given":"I.","family":"Sykora","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sykora","sequence":"additional","affiliation":[]},{"given":"T.","family":"Sykora","sequence":"additional","affiliation":[]},{"given":"D.","family":"Ta","sequence":"additional","affiliation":[]},{"given":"K.","family":"Tackmann","sequence":"additional","affiliation":[]},{"given":"J.","family":"Taenzer","sequence":"additional","affiliation":[]},{"given":"A.","family":"Taffard","sequence":"additional","affiliation":[]},{"given":"R.","family":"Tafirout","sequence":"additional","affiliation":[]},{"given":"H.","family":"Takai","sequence":"additional","affiliation":[]},{"given":"R.","family":"Takashima","sequence":"additional","affiliation":[]},{"given":"K.","family":"Takeda","sequence":"additional","affiliation":[]},{"given":"T.","family":"Takeshita","sequence":"additional","affiliation":[]},{"given":"E. P.","family":"Takeva","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Takubo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Talby","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Talyshev","sequence":"additional","affiliation":[]},{"given":"N. M.","family":"Tamir","sequence":"additional","affiliation":[]},{"given":"J.","family":"Tanaka","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tanaka","sequence":"additional","affiliation":[]},{"given":"R.","family":"Tanaka","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tapia Araya","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tapprogge","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tarek Abouelfadl Mohamed","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tarem","sequence":"additional","affiliation":[]},{"given":"K.","family":"Tariq","sequence":"additional","affiliation":[]},{"given":"G.","family":"Tarna","sequence":"additional","affiliation":[]},{"given":"G. F.","family":"Tartarelli","sequence":"additional","affiliation":[]},{"given":"P.","family":"Tas","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tasevsky","sequence":"additional","affiliation":[]},{"given":"T.","family":"Tashiro","sequence":"additional","affiliation":[]},{"given":"E.","family":"Tassi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tavares Delgado","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Tayalati","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Taylor","sequence":"additional","affiliation":[]},{"given":"G. N.","family":"Taylor","sequence":"additional","affiliation":[]},{"given":"W.","family":"Taylor","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Tee","sequence":"additional","affiliation":[]},{"given":"R.","family":"Teixeira De Lima","sequence":"additional","affiliation":[]},{"given":"P.","family":"Teixeira-Dias","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ten Kate","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Teoh","sequence":"additional","affiliation":[]},{"given":"S.","family":"Terada","sequence":"additional","affiliation":[]},{"given":"K.","family":"Terashi","sequence":"additional","affiliation":[]},{"given":"J.","family":"Terron","sequence":"additional","affiliation":[]},{"given":"S.","family":"Terzo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Testa","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Teuscher","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Thais","sequence":"additional","affiliation":[]},{"given":"T.","family":"Theveneaux-Pelzer","sequence":"additional","affiliation":[]},{"given":"F.","family":"Thiele","sequence":"additional","affiliation":[]},{"given":"D. W.","family":"Thomas","sequence":"additional","affiliation":[]},{"given":"J. O.","family":"Thomas","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Thomas","sequence":"additional","affiliation":[]},{"given":"P. D.","family":"Thompson","sequence":"additional","affiliation":[]},{"given":"L. A.","family":"Thomsen","sequence":"additional","affiliation":[]},{"given":"E.","family":"Thomson","sequence":"additional","affiliation":[]},{"given":"E. J.","family":"Thorpe","sequence":"additional","affiliation":[]},{"given":"R. E.","family":"Ticse Torres","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tikhomirov","sequence":"additional","affiliation":[]},{"given":"Yu. A.","family":"Tikhonov","sequence":"additional","affiliation":[]},{"given":"S.","family":"Timoshenko","sequence":"additional","affiliation":[]},{"given":"P.","family":"Tipton","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tisserant","sequence":"additional","affiliation":[]},{"given":"K.","family":"Todome","sequence":"additional","affiliation":[]},{"given":"S.","family":"Todorova-Nova","sequence":"additional","affiliation":[]},{"given":"S.","family":"Todt","sequence":"additional","affiliation":[]},{"given":"J.","family":"Tojo","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tok\u00e1r","sequence":"additional","affiliation":[]},{"given":"K.","family":"Tokushuku","sequence":"additional","affiliation":[]},{"given":"E.","family":"Tolley","sequence":"additional","affiliation":[]},{"given":"K. G.","family":"Tomiwa","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tomoto","sequence":"additional","affiliation":[]},{"given":"L.","family":"Tompkins","sequence":"additional","affiliation":[]},{"given":"B.","family":"Tong","sequence":"additional","affiliation":[]},{"given":"P.","family":"Tornambe","sequence":"additional","affiliation":[]},{"given":"E.","family":"Torrence","sequence":"additional","affiliation":[]},{"given":"H.","family":"Torres","sequence":"additional","affiliation":[]},{"given":"E.","family":"Torr\u00f3 Pastor","sequence":"additional","affiliation":[]},{"given":"C.","family":"Tosciri","sequence":"additional","affiliation":[]},{"given":"J.","family":"Toth","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Tovey","sequence":"additional","affiliation":[]},{"given":"A.","family":"Traeet","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Treado","sequence":"additional","affiliation":[]},{"given":"T.","family":"Trefzger","sequence":"additional","affiliation":[]},{"given":"F.","family":"Tresoldi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tricoli","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Trigger","sequence":"additional","affiliation":[]},{"given":"S.","family":"Trincaz-Duvoid","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Trischuk","sequence":"additional","affiliation":[]},{"given":"W.","family":"Trischuk","sequence":"additional","affiliation":[]},{"given":"B.","family":"Trocm\u00e9","sequence":"additional","affiliation":[]},{"given":"A.","family":"Trofymov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Troncon","sequence":"additional","affiliation":[]},{"given":"F.","family":"Trovato","sequence":"additional","affiliation":[]},{"given":"L.","family":"Truong","sequence":"additional","affiliation":[]},{"given":"M.","family":"Trzebinski","sequence":"additional","affiliation":[]},{"given":"A.","family":"Trzupek","sequence":"additional","affiliation":[]},{"given":"F.","family":"Tsai","sequence":"additional","affiliation":[]},{"given":"J. C-L.","family":"Tseng","sequence":"additional","affiliation":[]},{"given":"P. V.","family":"Tsiareshka","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tsirigotis","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tsiskaridze","sequence":"additional","affiliation":[]},{"given":"E. G.","family":"Tskhadadze","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tsopoulou","sequence":"additional","affiliation":[]},{"given":"I. I.","family":"Tsukerman","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tsulaia","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tsuno","sequence":"additional","affiliation":[]},{"given":"D.","family":"Tsybychev","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Tu","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tudorache","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tudorache","sequence":"additional","affiliation":[]},{"given":"T. T.","family":"Tulbure","sequence":"additional","affiliation":[]},{"given":"A. N.","family":"Tuna","sequence":"additional","affiliation":[]},{"given":"S.","family":"Turchikhin","sequence":"additional","affiliation":[]},{"given":"D.","family":"Turgeman","sequence":"additional","affiliation":[]},{"given":"I. Turk","family":"Cakir","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Turner","sequence":"additional","affiliation":[]},{"given":"R.","family":"Turra","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Tuts","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tzamarias","sequence":"additional","affiliation":[]},{"given":"E.","family":"Tzovara","sequence":"additional","affiliation":[]},{"given":"G.","family":"Ucchielli","sequence":"additional","affiliation":[]},{"given":"K.","family":"Uchida","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ukegawa","sequence":"additional","affiliation":[]},{"given":"G.","family":"Unal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Undrus","sequence":"additional","affiliation":[]},{"given":"G.","family":"Unel","sequence":"additional","affiliation":[]},{"given":"F. C.","family":"Ungaro","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Unno","sequence":"additional","affiliation":[]},{"given":"K.","family":"Uno","sequence":"additional","affiliation":[]},{"given":"J.","family":"Urban","sequence":"additional","affiliation":[]},{"given":"P.","family":"Urquijo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Usai","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Uysal","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vacek","sequence":"additional","affiliation":[]},{"given":"B.","family":"Vachon","sequence":"additional","affiliation":[]},{"given":"K. O. H.","family":"Vadla","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vaidya","sequence":"additional","affiliation":[]},{"given":"C.","family":"Valderanis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Valdes Santurio","sequence":"additional","affiliation":[]},{"given":"M.","family":"Valente","sequence":"additional","affiliation":[]},{"given":"S.","family":"Valentinetti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Valero","sequence":"additional","affiliation":[]},{"given":"L.","family":"Val\u00e9ry","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Vallance","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vallier","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Valls Ferrer","sequence":"additional","affiliation":[]},{"given":"T. R.","family":"Van Daalen","sequence":"additional","affiliation":[]},{"given":"P.","family":"Van Gemmeren","sequence":"additional","affiliation":[]},{"given":"I.","family":"Van Vulpen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vanadia","sequence":"additional","affiliation":[]},{"given":"W.","family":"Vandelli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vandenbroucke","sequence":"additional","affiliation":[]},{"given":"E. R.","family":"Vandewall","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vaniachine","sequence":"additional","affiliation":[]},{"given":"D.","family":"Vannicola","sequence":"additional","affiliation":[]},{"given":"R.","family":"Vari","sequence":"additional","affiliation":[]},{"given":"E. W.","family":"Varnes","sequence":"additional","affiliation":[]},{"given":"C.","family":"Varni","sequence":"additional","affiliation":[]},{"given":"T.","family":"Varol","sequence":"additional","affiliation":[]},{"given":"D.","family":"Varouchas","sequence":"additional","affiliation":[]},{"given":"K. E.","family":"Varvell","sequence":"additional","affiliation":[]},{"given":"M. E.","family":"Vasile","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Vasquez","sequence":"additional","affiliation":[]},{"given":"F.","family":"Vazeille","sequence":"additional","affiliation":[]},{"given":"D.","family":"Vazquez Furelos","sequence":"additional","affiliation":[]},{"given":"T.","family":"Vazquez Schroeder","sequence":"additional","affiliation":[]},{"given":"J.","family":"Veatch","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vecchio","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Veen","sequence":"additional","affiliation":[]},{"given":"L. M.","family":"Veloce","sequence":"additional","affiliation":[]},{"given":"F.","family":"Veloso","sequence":"additional","affiliation":[]},{"given":"S.","family":"Veneziano","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ventura","sequence":"additional","affiliation":[]},{"given":"N.","family":"Venturi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Verbytskyi","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vercesi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Verducci","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Vergel Infante","sequence":"additional","affiliation":[]},{"given":"C.","family":"Vergis","sequence":"additional","affiliation":[]},{"given":"W.","family":"Verkerke","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Vermeulen","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Vermeulen","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Vetterli","sequence":"additional","affiliation":[]},{"given":"N.","family":"Viaux Maira","sequence":"additional","affiliation":[]},{"given":"M. Vicente","family":"Barreto Pinto","sequence":"additional","affiliation":[]},{"given":"T.","family":"Vickey","sequence":"additional","affiliation":[]},{"given":"O. E. Vickey","family":"Boeriu","sequence":"additional","affiliation":[]},{"given":"G. H. A.","family":"Viehhauser","sequence":"additional","affiliation":[]},{"given":"L.","family":"Vigani","sequence":"additional","affiliation":[]},{"given":"M.","family":"Villa","sequence":"additional","affiliation":[]},{"given":"M.","family":"Villaplana Perez","sequence":"additional","affiliation":[]},{"given":"E.","family":"Vilucchi","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Vincter","sequence":"additional","affiliation":[]},{"given":"G. S.","family":"Virdee","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vishwakarma","sequence":"additional","affiliation":[]},{"given":"C.","family":"Vittori","sequence":"additional","affiliation":[]},{"given":"I.","family":"Vivarelli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vogel","sequence":"additional","affiliation":[]},{"given":"P.","family":"Vokac","sequence":"additional","affiliation":[]},{"given":"S. E.","family":"von Buddenbrock","sequence":"additional","affiliation":[]},{"given":"E.","family":"Von Toerne","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vorobel","sequence":"additional","affiliation":[]},{"given":"K.","family":"Vorobev","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vos","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Vossebeld","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vozak","sequence":"additional","affiliation":[]},{"given":"N.","family":"Vranjes","sequence":"additional","affiliation":[]},{"given":"M. Vranjes","family":"Milosavljevic","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vrba","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vreeswijk","sequence":"additional","affiliation":[]},{"given":"R.","family":"Vuillermet","sequence":"additional","affiliation":[]},{"given":"I.","family":"Vukotic","sequence":"additional","affiliation":[]},{"given":"P.","family":"Wagner","sequence":"additional","affiliation":[]},{"given":"W.","family":"Wagner","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wagner-Kuhr","sequence":"additional","affiliation":[]},{"given":"S.","family":"Wahdan","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wahlberg","sequence":"additional","affiliation":[]},{"given":"V. M.","family":"Walbrecht","sequence":"additional","affiliation":[]},{"given":"J.","family":"Walder","sequence":"additional","affiliation":[]},{"given":"R.","family":"Walker","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Walker","sequence":"additional","affiliation":[]},{"given":"W.","family":"Walkowiak","sequence":"additional","affiliation":[]},{"given":"V.","family":"Wallangen","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"A. Z.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"F.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"P.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"R.-J.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"R.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"R.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"W. T.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"W. X.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wanotayaroj","sequence":"additional","affiliation":[]},{"given":"A.","family":"Warburton","sequence":"additional","affiliation":[]},{"given":"C. P.","family":"Ward","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Wardrope","sequence":"additional","affiliation":[]},{"given":"N.","family":"Warrack","sequence":"additional","affiliation":[]},{"given":"A.","family":"Washbrook","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Watson","sequence":"additional","affiliation":[]},{"given":"M. F.","family":"Watson","sequence":"additional","affiliation":[]},{"given":"G.","family":"Watts","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Waugh","sequence":"additional","affiliation":[]},{"given":"A. F.","family":"Webb","sequence":"additional","affiliation":[]},{"given":"C.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Weidberg","sequence":"additional","affiliation":[]},{"given":"J.","family":"Weingarten","sequence":"additional","affiliation":[]},{"given":"M.","family":"Weirich","sequence":"additional","affiliation":[]},{"given":"C.","family":"Weiser","sequence":"additional","affiliation":[]},{"given":"P. S.","family":"Wells","sequence":"additional","affiliation":[]},{"given":"T.","family":"Wenaus","sequence":"additional","affiliation":[]},{"given":"T.","family":"Wengler","sequence":"additional","affiliation":[]},{"given":"S.","family":"Wenig","sequence":"additional","affiliation":[]},{"given":"N.","family":"Wermes","sequence":"additional","affiliation":[]},{"given":"M. D.","family":"Werner","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wessels","sequence":"additional","affiliation":[]},{"given":"T. D.","family":"Weston","sequence":"additional","affiliation":[]},{"given":"K.","family":"Whalen","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Whallon","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Wharton","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"White","sequence":"additional","affiliation":[]},{"given":"A.","family":"White","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"White","sequence":"additional","affiliation":[]},{"given":"D.","family":"Whiteson","sequence":"additional","affiliation":[]},{"given":"B. W.","family":"Whitmore","sequence":"additional","affiliation":[]},{"given":"W.","family":"Wiedenmann","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wiel","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wielers","sequence":"additional","affiliation":[]},{"given":"N.","family":"Wieseotte","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wiglesworth","sequence":"additional","affiliation":[]},{"given":"L. A. M.","family":"Wiik-Fuchs","sequence":"additional","affiliation":[]},{"given":"H. G.","family":"Wilkens","sequence":"additional","affiliation":[]},{"given":"L. J.","family":"Wilkins","sequence":"additional","affiliation":[]},{"given":"H. H.","family":"Williams","sequence":"additional","affiliation":[]},{"given":"S.","family":"Williams","sequence":"additional","affiliation":[]},{"given":"C.","family":"Willis","sequence":"additional","affiliation":[]},{"given":"S.","family":"Willocq","sequence":"additional","affiliation":[]},{"given":"I.","family":"Wingerter-Seez","sequence":"additional","affiliation":[]},{"given":"E.","family":"Winkels","sequence":"additional","affiliation":[]},{"given":"F.","family":"Winklmeier","sequence":"additional","affiliation":[]},{"given":"O. J.","family":"Winston","sequence":"additional","affiliation":[]},{"given":"B. T.","family":"Winter","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wittgen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wobisch","sequence":"additional","affiliation":[]},{"given":"A.","family":"Wolf","sequence":"additional","affiliation":[]},{"given":"T. M. H.","family":"Wolf","sequence":"additional","affiliation":[]},{"given":"R.","family":"Wolff","sequence":"additional","affiliation":[]},{"given":"R.","family":"W\u00f6lker","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wollrath","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"Wolter","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wolters","sequence":"additional","affiliation":[]},{"given":"V. W. S.","family":"Wong","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Woods","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Worm","sequence":"additional","affiliation":[]},{"given":"B. K.","family":"Wosiek","sequence":"additional","affiliation":[]},{"given":"K. W.","family":"Wo\u017aniak","sequence":"additional","affiliation":[]},{"given":"K.","family":"Wraight","sequence":"additional","affiliation":[]},{"given":"S. L.","family":"Wu","sequence":"additional","affiliation":[]},{"given":"X.","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Wu","sequence":"additional","affiliation":[]},{"given":"T. R.","family":"Wyatt","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Wynne","sequence":"additional","affiliation":[]},{"given":"S.","family":"Xella","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Xi","sequence":"additional","affiliation":[]},{"given":"X.","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"I.","family":"Xiotidis","sequence":"additional","affiliation":[]},{"given":"D.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"H.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"L.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"T.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"W.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"B.","family":"Yabsley","sequence":"additional","affiliation":[]},{"given":"S.","family":"Yacoob","sequence":"additional","affiliation":[]},{"given":"K.","family":"Yajima","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Yallup","sequence":"additional","affiliation":[]},{"given":"N.","family":"Yamaguchi","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yamaguchi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Yamamoto","sequence":"additional","affiliation":[]},{"given":"M.","family":"Yamatani","sequence":"additional","affiliation":[]},{"given":"T.","family":"Yamazaki","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yamazaki","sequence":"additional","affiliation":[]},{"given":"J.","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Yan","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"H. T.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"T.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"X.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"W-M.","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Y. C.","family":"Yap","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yasu","sequence":"additional","affiliation":[]},{"given":"E.","family":"Yatsenko","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ye","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ye","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ye","sequence":"additional","affiliation":[]},{"given":"I.","family":"Yeletskikh","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Yexley","sequence":"additional","affiliation":[]},{"given":"E.","family":"Yigitbasi","sequence":"additional","affiliation":[]},{"given":"K.","family":"Yorita","sequence":"additional","affiliation":[]},{"given":"K.","family":"Yoshihara","sequence":"additional","affiliation":[]},{"given":"C. J. S.","family":"Young","sequence":"additional","affiliation":[]},{"given":"C.","family":"Young","sequence":"additional","affiliation":[]},{"given":"J.","family":"Yu","sequence":"additional","affiliation":[]},{"given":"R.","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"X.","family":"Yue","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zaazoua","sequence":"additional","affiliation":[]},{"given":"B.","family":"Zabinski","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zacharis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Zaffaroni","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Zaitsev","sequence":"additional","affiliation":[]},{"given":"T.","family":"Zakareishvili","sequence":"additional","affiliation":[]},{"given":"N.","family":"Zakharchuk","sequence":"additional","affiliation":[]},{"given":"S.","family":"Zambito","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zanzi","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Zaripovas","sequence":"additional","affiliation":[]},{"given":"S. V.","family":"Zei\u00dfner","sequence":"additional","affiliation":[]},{"given":"C.","family":"Zeitnitz","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zemaityte","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"O.","family":"Zenin","sequence":"additional","affiliation":[]},{"given":"T.","family":"\u017deni\u0161","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zerwas","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zgubi\u010d","sequence":"additional","affiliation":[]},{"given":"B.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"D. F.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"H.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"J.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"L.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"L.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"R.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"X.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"X.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"P.","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"A.","family":"Zhemchugov","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zhong","sequence":"additional","affiliation":[]},{"given":"B.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"C.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"N.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"C. G.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"C.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"H. L.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"H.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"X.","family":"Zhuang","sequence":"additional","affiliation":[]},{"given":"K.","family":"Zhukov","sequence":"additional","affiliation":[]},{"given":"V.","family":"Zhulanov","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zieminska","sequence":"additional","affiliation":[]},{"given":"N. I.","family":"Zimine","sequence":"additional","affiliation":[]},{"given":"S.","family":"Zimmermann","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zinonos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ziolkowski","sequence":"additional","affiliation":[]},{"given":"L.","family":"\u017divkovi\u0107","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zobernig","sequence":"additional","affiliation":[]},{"given":"A.","family":"Zoccoli","sequence":"additional","affiliation":[]},{"given":"K.","family":"Zoch","sequence":"additional","affiliation":[]},{"given":"T. G.","family":"Zorbas","sequence":"additional","affiliation":[]},{"given":"R.","family":"Zou","sequence":"additional","affiliation":[]},{"given":"L.","family":"Zwalinski","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,2,9]]},"reference":[{"key":"1757_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.physletb.2012.08.020","volume":"716","author":"ATLAS Collaboration.","year":"2012","unstructured":"ATLAS Collaboration. Observation of a new particle in the search for the Standard Model Higgs boson with the ATLAS detector at the LHC. Phys. Lett. B 716, 1\u201329 (2012).","journal-title":"Phys. Lett. B"},{"key":"1757_CR2","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1016\/j.physletb.2012.08.021","volume":"716","author":"CMS Collaboration.","year":"2012","unstructured":"CMS Collaboration. Observation of a new boson at a mass of 125\u2009GeV with the CMS experiment at the LHC. Phys. Lett. B 716, 30\u201361 (2012).","journal-title":"Phys. Lett. B"},{"key":"1757_CR3","doi-asserted-by":"publisher","first-page":"883","DOI":"10.1103\/PhysRevLett.38.883","volume":"38","author":"BW Lee","year":"1977","unstructured":"Lee, B. W., Quigg, C. & Thacker, H. B. Strength of weak interactions at very high energies and the Higgs boson mass. Phys. Rev. Lett. 38, 883\u2013885 (1977).","journal-title":"Phys. Rev. Lett."},{"key":"1757_CR4","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1016\/0550-3213(85)90580-2","volume":"261","author":"MS Chanowitz","year":"1985","unstructured":"Chanowitz, M. S. & Gaillard, M. K. The TeV physics of strongly interacting W\u2019s and Z\u2019s. Nucl. Phys. B 261, 379\u2013431 (1985).","journal-title":"Nucl. Phys. B"},{"key":"1757_CR5","unstructured":"Szleper, M. The Higgs boson and the physics of WW scattering before and after Higgs discovery. Preprint at https:\/\/arxiv.org\/abs\/1412.8367 (2014)."},{"key":"1757_CR6","doi-asserted-by":"publisher","first-page":"081","DOI":"10.1007\/JHEP10(2011)081","volume":"10","author":"R Contino","year":"2011","unstructured":"Contino, R., Pappadopulo, D., Marzocca, D. & Rattazzi, R. On the effect of resonances in composite Higgs phenomenology. J. High Energy Phys. 10, 081 (2011).","journal-title":"J. High Energy Phys."},{"key":"1757_CR7","doi-asserted-by":"publisher","first-page":"034","DOI":"10.1088\/1126-6708\/2002\/07\/034","volume":"07","author":"N Arkani-Hamed","year":"2002","unstructured":"Arkani-Hamed, N., Cohen, A. G., Katz, E. & Nelson, A. E. The littlest Higgs. J. High Energy Phys. 07, 034 (2002).","journal-title":"J. High Energy Phys."},{"key":"1757_CR8","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-013-2704-3","volume":"74","author":"A Djouadi","year":"2014","unstructured":"Djouadi, A. Implications of the Higgs discovery for the MSSM. Eur. Phys. J. C 74, 2704 (2014).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR9","doi-asserted-by":"publisher","first-page":"073005","DOI":"10.1103\/PhysRevD.74.073005","volume":"74","author":"OJP Eboli","year":"2006","unstructured":"Eboli, O. J. P., Gonzalez-Garcia, M. C. & Mizukoshi, J. K. pp\u2009\u2192\u2009jje\u00b1\u03bc\u00b1\u03bd\u03bd and jje\u00b1\u03bc\u2213\u03bd\u03bd at $$O({\\alpha }_{em}^{6})$$ and $$O({\\alpha }_{em}^{6})$$ for the study of the quartic electroweak gauge boson vertex at CERN LHC. Phys. Rev. D 74, 073005 (2006).","journal-title":"Phys. Rev. D"},{"key":"1757_CR10","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/JHEP05(2022)039","volume":"2022","author":"R Bellan","year":"2022","unstructured":"Bellan, R. et al. A sensitivity study of VBS and diboson WW to dimension-6 EFT operators at the LHC. J. High Energy Phys. 2022, 39 (2022).","journal-title":"J. High Energy Phys."},{"key":"1757_CR11","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1140\/epjc\/s10052-019-6893-2","volume":"79","author":"R Gomez-Ambrosio","year":"2019","unstructured":"Gomez-Ambrosio, R. Studies of dimension-six EFT effects in vector boson scattering. Eur. Phys. J. C 79, 389 (2019).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR12","doi-asserted-by":"publisher","first-page":"161801","DOI":"10.1103\/PhysRevLett.123.161801","volume":"123","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Observation of electroweak production of a same-sign W boson pair in association with two jets in pp collisions at $$\\sqrt{s}=13$$ TeV with the ATLAS detector. Phys. Rev. Lett. 123, 161801 (2019).","journal-title":"Phys. Rev. Lett."},{"key":"1757_CR13","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/j.physletb.2019.05.012","volume":"793","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Observation of electroweak W\u00b1Z boson pair production in association with two jets in pp collisions at $$\\sqrt{s}=$$ 13 TeV with the ATLAS detector. Phys. Lett. B 793, 469\u2013492 (2019).","journal-title":"Phys. Lett. B"},{"key":"1757_CR14","doi-asserted-by":"publisher","first-page":"081801","DOI":"10.1103\/PhysRevLett.120.081801","volume":"120","author":"CMS Collaboration.","year":"2018","unstructured":"CMS Collaboration. Observation of electroweak production of same-sign W boson pairs in the two jet and two same-sign lepton final state in proton\u2013proton collisions at $$\\sqrt{s}=$$ 13 TeV. Phys. Rev. Lett. 120, 081801 (2018).","journal-title":"Phys. Rev. Lett."},{"key":"1757_CR15","unstructured":"CMS Collaboration. Measurements of production cross sections of WZ and same-sign WW boson pairs in association with two jets in proton\u2013proton collisions at $$\\sqrt{s}=$$ 13 TeV. Phys. Lett. B 809, 135710 (2020)."},{"key":"1757_CR16","doi-asserted-by":"publisher","first-page":"135992","DOI":"10.1016\/j.physletb.2020.135992","volume":"812","author":"CMS Collaboration.","year":"2021","unstructured":"CMS Collaboration. Evidence for electroweak production of four charged leptons and two jets in proton\u2013proton collisions at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$. Phys. Lett. B 812, 135992 (2021).","journal-title":"Phys. Lett. B"},{"key":"1757_CR17","doi-asserted-by":"publisher","first-page":"053003","DOI":"10.1103\/PhysRevD.90.053003","volume":"90","author":"C Englert","year":"2014","unstructured":"Englert, C. & Spannowsky, M. Limitations and opportunities of off-shell coupling measurements. Phys. Rev. D 90, 053003 (2014).","journal-title":"Phys. Rev. D"},{"key":"1757_CR18","unstructured":"ATLAS Collaboration. The ATLAS experiment at the CERN Large Hadron Collider. J. Instrum. 3, S08003 (2008)."},{"key":"1757_CR19","unstructured":"ATLAS Collaboration. ATLAS insertable B-layer technical design report, ATLAS-TDR-19. CERN https:\/\/cds.cern.ch\/record\/1291633 (2010)."},{"key":"1757_CR20","doi-asserted-by":"publisher","first-page":"T05008","DOI":"10.1088\/1748-0221\/13\/05\/T05008","volume":"13","author":"B Abbott","year":"2018","unstructured":"Abbott, B. et al. Production and integration of the ATLAS Insertable B-Layer. J. Instrum. 13, T05008 (2018).","journal-title":"J. Instrum."},{"key":"1757_CR21","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-017-4852-3","volume":"77","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Performance of the ATLAS trigger system in 2015. Eur. Phys. J. C 77, 317 (2017).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR22","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/JHEP03(2014)141","volume":"2014","author":"B J\u00e4ger","year":"2014","unstructured":"J\u00e4ger, B., Karlberg, A. & Zanderighi, G. Electroweak ZZjj production in the Standard Model and beyond in the POWHEG-BOX V2. J. High Energy Phys. 2014, 141 (2014).","journal-title":"J. High Energy Phys."},{"key":"1757_CR23","doi-asserted-by":"publisher","first-page":"040","DOI":"10.1007\/JHEP04(2015)040","volume":"04","author":"RD Ball","year":"2015","unstructured":"Ball, R. D. et al. Parton distributions for the LHC run II. J. High Energy Phys. 04, 040 (2015).","journal-title":"J. High Energy Phys."},{"key":"1757_CR24","doi-asserted-by":"publisher","first-page":"079","DOI":"10.1007\/JHEP07(2014)079","volume":"07","author":"J Alwall","year":"2014","unstructured":"Alwall, J. et al. The automated computation of tree-level and next-to-leading order differential cross sections, and their matching to parton shower simulations. J. High Energy Phys. 07, 079 (2014).","journal-title":"J. High Energy Phys."},{"key":"1757_CR25","doi-asserted-by":"publisher","first-page":"007","DOI":"10.1088\/1126-6708\/2009\/02\/007","volume":"02","author":"T Gleisberg","year":"2009","unstructured":"Gleisberg, T. et al. Event generation with SHERPA 1.1. J. High Energy Phys. 02, 007 (2009).","journal-title":"J. High Energy Phys."},{"key":"1757_CR26","doi-asserted-by":"publisher","first-page":"082","DOI":"10.1007\/JHEP12(2013)082","volume":"12","author":"N Kauer","year":"2013","unstructured":"Kauer, N. Interference effects for H\u2009\u2192\u2009WW\/ZZ $$\\to \\ell {\\overline{\\nu }}_{\\ell }\\overline{\\ell }{\\nu }_{\\ell }$$ searches in gluon fusion at the LHC. J. High Energy Phys. 12, 082, (2013).","journal-title":"J. High Energy Phys."},{"key":"1757_CR27","doi-asserted-by":"publisher","first-page":"033009","DOI":"10.1103\/PhysRevD.89.033009","volume":"89","author":"J Gao","year":"2014","unstructured":"Gao, J. et al. CT10 next-to-next-to-leading order global analysis of QCD. Phys. Rev. D 89, 033009 (2014).","journal-title":"Phys. Rev. D"},{"key":"1757_CR28","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1088\/1126-6708\/2007\/09\/126","volume":"09","author":"S Frixione","year":"2007","unstructured":"Frixione, S., Ridolfi, G. & Nason, P. A positive-weight next-to-leading-order Monte Carlo for heavy flavour hadroproduction. J. High Energy Phys. 09, 126 (2007).","journal-title":"J. High Energy Phys."},{"key":"1757_CR29","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1088\/1126-6708\/2009\/09\/111","volume":"09","author":"S Alioli","year":"2009","unstructured":"Alioli, S., Nason, P., Oleari, C. & Re, E. NLO single-top production matched with shower in POWHEG: s- and t-channel contributions. J. High Energy Phys. 09, 111 (2009).","journal-title":"J. High Energy Phys."},{"key":"1757_CR30","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1007\/JHEP09(2012)130","volume":"09","author":"R Frederix","year":"2012","unstructured":"Frederix, R., Re, E. & Torrielli, P. Single-top t-channel hadroproduction in the four-flavour scheme with POWHEG and aMC@NLO. J. High Energy Phys. 09, 130 (2012).","journal-title":"J. High Energy Phys."},{"key":"1757_CR31","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-011-1547-z","volume":"71","author":"E Re","year":"2011","unstructured":"Re, E. Single-top Wt-channel production matched with parton showers using the POWHEG method. Eur. Phys. J. C 71, 1547 (2011).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR32","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1016\/j.cpc.2008.01.036","volume":"178","author":"T Sj\u00f6strand","year":"2008","unstructured":"Sj\u00f6strand, T., Mrenna, S. & Skands, P. Z. A brief introduction to PYTHIA 8.1. Comput. Phys. Commun. 178, 852\u2013867 (2008).","journal-title":"Comput. Phys. Commun."},{"key":"1757_CR33","doi-asserted-by":"publisher","first-page":"244","DOI":"10.1016\/j.nuclphysb.2012.10.003","volume":"867","author":"RD Ball","year":"2013","unstructured":"Ball, R. D. et al. Parton distributions with LHC data. Nucl. Phys. B 867, 244\u2013289 (2013).","journal-title":"Nucl. Phys. B"},{"key":"1757_CR34","unstructured":"ATLAS Collaboration. ATLAS Pythia 8 tunes to 7\u2009TeV data, ATL-PHYS-PUB-2014-021 CERN https:\/\/cds.cern.ch\/record\/1966419 (2014)."},{"key":"1757_CR35","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-010-1429-9","volume":"70","author":"ATLAS Collaboration.","year":"2010","unstructured":"ATLAS Collaboration. The ATLAS simulation infrastructure. Eur. Phys. J. C 70, 823 (2010).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR36","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1016\/S0168-9002(03)01368-8","volume":"506","author":"S Agostinelli","year":"2003","unstructured":"Agostinelli, S. et al. Geant4\u2014a simulation toolkit. Nucl. Instrum. Methods A 506, 250\u2013303 (2003).","journal-title":"Nucl. Instrum. Methods A"},{"key":"1757_CR37","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s41781-021-00062-2","volume":"6","author":"G Aad","year":"2022","unstructured":"Aad, G. et al. Emulating the impact of additional proton-proton interactions in the ATLAS simulation by presampling sets of inelastic Monte Carlo events. Comput. Softw. Big Sci. 6, 3 (2022).","journal-title":"Comput. Softw. Big Sci."},{"key":"1757_CR38","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-016-4120-y","volume":"76","author":"ATLAS Collaboration.","year":"2016","unstructured":"ATLAS Collaboration. Muon reconstruction performance of the ATLAS detector in proton\u2013proton collision data at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$. Eur. Phys. J. C 76, 292 (2016).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR39","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-019-7140-6","volume":"79","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Electron reconstruction and identification in the ATLAS experiment using the 2015 and 2016 LHC proton\u2013proton collision data at$$\\sqrt{s}$$ = 13\u2009TeV. Eur. Phys. J. C 79, 639 (2019).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR40","doi-asserted-by":"publisher","first-page":"063","DOI":"10.1088\/1126-6708\/2008\/04\/063","volume":"04","author":"M Cacciari","year":"2008","unstructured":"Cacciari, M., Salam, G. P. & Soyez, G. The anti-kt jet clustering algorithm. J. High Energy Phys. 04, 063 (2008).","journal-title":"J. High Energy Phys."},{"key":"1757_CR41","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-012-1896-2","volume":"72","author":"M Cacciari","year":"2012","unstructured":"Cacciari, M., Salam, G. P. & Soyez, G. FastJet user manual. Eur. Phys. J. C 72, 1896 (2012).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR42","doi-asserted-by":"publisher","first-page":"072002","DOI":"10.1103\/PhysRevD.96.072002","volume":"96","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Jet energy scale measurements and their systematic uncertainties in proton\u2013proton collisions at $$\\sqrt{s}=13$$ TeV with the ATLAS detector. Phys. Rev. D 96, 072002 (2017).","journal-title":"Phys. Rev. D"},{"key":"1757_CR43","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-016-4395-z","volume":"76","author":"ATLAS Collaboration.","year":"2016","unstructured":"ATLAS Collaboration. Performance of pile-up mitigation techniques for jets in pp collisions at $$\\sqrt{s}=8\\,{{{\\rm{TeV}}}}$$ using the ATLAS detector. Eur. Phys. J. C 76, 581 (2016).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR44","doi-asserted-by":"publisher","first-page":"970","DOI":"10.1140\/epjc\/s10052-019-7450-8","volume":"79","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. ATLAS b-jet identification performance and efficiency measurement with $$t\\overline{t}$$ events in pp collisions at $$\\sqrt{s}=13$$ TeV. Eur. Phys. J. C 79, 970 (2019).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR45","doi-asserted-by":"publisher","first-page":"032003","DOI":"10.1103\/PhysRevD.94.032003","volume":"94","author":"ATLAS Collaboration.","year":"2016","unstructured":"ATLAS Collaboration. Search for pair production of gluinos decaying via stop and sbottom in events with b-jets and large missing transverse momentum in pp collisions at $$\\sqrt{s}=13$$ TeV with the ATLAS detector. Phys. Rev. D 94, 032003 (2016).","journal-title":"Phys. Rev. D"},{"key":"1757_CR46","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-018-6288-9","volume":"78","author":"ATLAS Collaboration.","year":"2018","unstructured":"ATLAS Collaboration. Performance of missing transverse momentum reconstruction with the ATLAS detector using proton\u2013proton collisions at $$\\sqrt{s}$$ = 13 TeV. Eur. Phys. J. C 78, 903 (2018).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR47","unstructured":"ATLAS Collaboration. Object-based missing transverse momentum significance in the ATLAS detector. ATLAS-CONF-2018-038. CERN https:\/\/cds.cern.ch\/record\/2630948 (2018)."},{"key":"1757_CR48","doi-asserted-by":"publisher","first-page":"135341","DOI":"10.1016\/j.physletb.2020.135341","volume":"803","author":"ATLAS Collaboration.","year":"2020","unstructured":"ATLAS Collaboration. Evidence for electroweak production of two jets in association with a em>Z\u03b3 pair in pp collisions at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$ with the ATLAS detector. Phys. Lett. B 803, 135341 (2020).","journal-title":"Phys. Lett. B"},{"key":"1757_CR49","first-page":"031","volume":"04","author":"ATLAS Collaboration.","year":"2014","unstructured":"ATLAS Collaboration. Measurement of the electroweak production of dijets in association with a Z-boson and distributions sensitive to vector boson fusion in proton\u2013proton collisions at $$\\sqrt{s}=8\\,{{{\\rm{TeV}}}}$$ using the ATLAS detector. J. High Energy Phys. 04, 031 (2014).","journal-title":"J. High Energy Phys."},{"key":"1757_CR50","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-017-5007-2","volume":"77","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Measurements of electroweak Wjj production and constraints on anomalous gauge couplings with the ATLAS detector. Eur. Phys. J. C 77, 474 (2017).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR51","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1016\/j.physletb.2017.10.040","volume":"775","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Measurement of the cross-section for electroweak production of dijets in association with a Z boson in pp collisions at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$ with the ATLAS detector. Phys. Lett. B 775, 206\u2013228 (2017).","journal-title":"Phys. Lett. B"},{"key":"1757_CR52","doi-asserted-by":"publisher","first-page":"032005","DOI":"10.1103\/PhysRevD.97.032005","volume":"97","author":"ATLAS Collaboration.","year":"2018","unstructured":"ATLAS Collaboration. $$ZZ\\to {\\ell }^{+}{\\ell }^{-}{\\ell }^{{\\,}^{\\prime} +}{\\ell }^{{\\,}^{\\prime} -}$$ cross-section measurements and search for anomalous triple gauge couplings in 13\u2009TeV pp collisions with the ATLAS detector. Phys. Rev. D 97, 032005 (2018).","journal-title":"Phys. Rev. D"},{"key":"1757_CR53","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1016\/j.physletb.2017.11.049","volume":"776","author":"ATLAS Collaboration.","year":"2018","unstructured":"ATLAS Collaboration. Search for an invisibly decaying Higgs boson or dark matter candidates produced in association with a Z boson in pp collisions at $$\\sqrt{s}=$$ 13\u2009TeV with the ATLAS detector. Phys. Lett. B 776, 318\u2013337 (2018).","journal-title":"Phys. Lett. B"},{"key":"1757_CR54","unstructured":"ATLAS Collaboration. Luminosity determination in pp collisions at $$\\sqrt{s}=13$$ TeV using the ATLAS detector at the LHC, ATLAS-CONF-2019-021. CERN http:\/\/cds.cern.ch\/record\/2677054 (2019)."},{"key":"1757_CR55","doi-asserted-by":"publisher","first-page":"P07017","DOI":"10.1088\/1748-0221\/13\/07\/P07017","volume":"13","author":"G Avoni","year":"2018","unstructured":"Avoni, G. et al. The new lucid-2 detector for luminosity measurement and monitoring in atlas. J. Instrum. 13, P07017 (2018).","journal-title":"J. Instrum."},{"key":"1757_CR56","doi-asserted-by":"publisher","first-page":"023001","DOI":"10.1088\/0954-3899\/43\/2\/023001","volume":"43","author":"J Butterworth","year":"2016","unstructured":"Butterworth, J. et al. PDF4LHC recommendations for LHC Run II. J. Phys. G 43, 023001 (2016).","journal-title":"J. Phys. G"},{"key":"1757_CR57","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/JHEP10(2019)127","volume":"10","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Measurement of ZZ production in the \u2113\u2113\u03bd\u03bd final state with the ATLAS detector in pp collisions at $$\\sqrt{s}=13$$ TeV. J. High Energy Phys. 10, 127 (2019).","journal-title":"J. High Energy Phys."},{"key":"1757_CR58","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-016-4018-8","volume":"76","author":"J Bellm","year":"2016","unstructured":"Bellm, J. et al. Herwig 7.0\/Herwig++ 3.0 release note. Eur. Phys. J. C 76, 196 (2016).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR59","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1140\/epjc\/s10052-008-0798-9","volume":"58","author":"M B\u00e4hr","year":"2008","unstructured":"B\u00e4hr, M. et al. Herwig++ physics and manual. Eur. Phys. J. C 58, 639\u2013707 (2008).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR60","doi-asserted-by":"publisher","first-page":"1189","DOI":"10.1214\/aos\/1013203451","volume":"29","author":"JH Friedman","year":"2001","unstructured":"Friedman, J. H. Greedy function approximation: A gradient boosting machine. Ann. Stat. 29, 1189\u20131232 (2001).","journal-title":"Ann. Stat."},{"key":"1757_CR61","unstructured":"H\u00f6cker, A. et al. TMVA \u2013 toolkit for multivariate data analysis. Preprint at https:\/\/arxiv.org\/abs\/physics\/0703039 (2007)."},{"key":"1757_CR62","doi-asserted-by":"publisher","first-page":"1554","DOI":"10.1140\/epjc\/s10052-011-1554-0","volume":"71","author":"G Cowan","year":"2011","unstructured":"Cowan, G., Cranmer, K., Gross, E. & Vitells, O. Asymptotic formulae for likelihood-based tests of new physics. Eur. Phys. J. C 71, 1554 (2011).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR63","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1016\/j.physletb.2017.10.020","volume":"774","author":"CMS Collaboration.","year":"2017","unstructured":"CMS Collaboration. Measurement of vector boson scattering and constraints on anomalous quartic couplings from events with four leptons and two jets in proton\u2013proton collisions at $$\\sqrt{s}=$$ 13 TeV. Phys. Lett. B 774, 682\u2013705 (2017).","journal-title":"Phys. Lett. B"},{"key":"1757_CR64","unstructured":"ATLAS Collaboration. ATLAS computing acknowledgements, ATL-SOFT-PUB-2021-003. CERN (2021) https:\/\/cds.cern.ch\/record\/2776662."},{"key":"1757_CR65","doi-asserted-by":"publisher","unstructured":"ATLAS Collaboration. Observation of electroweak production of two jets and a Z-boson pair with the ATLAS detector at the LHC (version 3). HEPData https:\/\/doi.org\/10.17182\/hepdata.93015.v3 (2022).","DOI":"10.17182\/hepdata.93015.v3"}],"container-title":["Nature Physics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,10]],"date-time":"2023-02-10T15:32:36Z","timestamp":1676043156000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,9]]},"references-count":65,"alternative-id":["1757"],"URL":"http:\/\/dx.doi.org\/10.1038\/s41567-022-01757-y","relation":{},"ISSN":["1745-2473","1745-2481"],"issn-type":[{"value":"1745-2473","type":"print"},{"value":"1745-2481","type":"electronic"}],"subject":["General Physics and Astronomy"],"published":{"date-parts":[[2023,2,9]]},"assertion":[{"value":"27 April 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 August 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 February 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index fbf6f72c02..c27cebf65a 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -22,6 +22,13 @@ class CrossrefMappingTest { val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val mapper = new ObjectMapper() + @Test + def testMissingAuthorParser():Unit = { + val json: String = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json")).mkString + val result = Crossref2Oaf.convert(json) + result.filter(o => o.isInstanceOf[Publication]).map(p=> p.asInstanceOf[Publication]).foreach(p =>assertTrue(p.getAuthor.size()>0)) + } + @Test def testFunderRelationshipsMapping(): Unit = { val template = Source From 078df0b4d1f9afcf254235bc944ed3e26d6235a6 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Fri, 26 Jan 2024 20:19:52 +0100 Subject: [PATCH 48/56] Use SparkSQL in place of Hive for executing step16-createIndicatorsTables.sql of stats update wf --- dhp-workflows/dhp-stats-update/pom.xml | 5 + .../scripts/step16-createIndicatorsTables.sql | 627 +++++++++--------- .../dhp/oa/graph/stats/oozie_app/workflow.xml | 199 +++--- 3 files changed, 436 insertions(+), 395 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index f491b58689..cc15b8a15b 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -8,6 +8,11 @@ 4.0.0 dhp-stats-update + + eu.dnetlib.dhp + dhp-common + ${project.version} + org.apache.spark spark-core_${scala.binary.version} diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 5aa14e2c26..f13b2500cb 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -1,5 +1,5 @@ -- Sprint 1 ---- -drop table if exists ${stats_db_name}.indi_pub_green_oa purge; +drop table if exists ${stats_db_name}.indi_pub_green_oa purge; /*EOS*/ --create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as --select distinct p.id, coalesce(green_oa, 0) as green_oa @@ -24,9 +24,9 @@ from ${stats_db_name}.publication p where datasource.type like '%Repository%' and (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and datasource.name!='Other') tmp - on p.id= tmp.id; + on p.id= tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_grey_lit purge; +drop table if exists ${stats_db_name}.indi_pub_grey_lit purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_grey_lit stored as parquet as select distinct p.id, coalesce(grey_lit, 0) as grey_lit @@ -37,9 +37,9 @@ from ${stats_db_name}.publication p join ${stats_db_name}.result_classifications rt on rt.id = p.id where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and not exists (select 1 from ${stats_db_name}.result_classifications rc where type ='Other literature type' - and rc.id=p.id)) tmp on p.id=tmp.id; + and rc.id=p.id)) tmp on p.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_doi_from_crossref purge; +drop table if exists ${stats_db_name}.indi_pub_doi_from_crossref purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_doi_from_crossref stored as parquet as select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref @@ -48,10 +48,10 @@ from ${stats_db_name}.publication p (select ri.id, 1 as doi_from_crossref from ${stats_db_name}.result_instance ri join ${stats_db_name}.datasource d on d.id = ri.collectedfrom where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp - on tmp.id=p.id; + on tmp.id=p.id; /*EOS*/ -- Sprint 2 ---- -drop table if exists ${stats_db_name}.indi_result_has_cc_licence purge; +drop table if exists ${stats_db_name}.indi_result_has_cc_licence purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_has_cc_licence stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -59,9 +59,9 @@ from ${stats_db_name}.result r left outer join (select r.id, license.type as lic from ${stats_db_name}.result r join ${stats_db_name}.result_licenses as license on license.id = r.id where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp - on r.id= tmp.id; + on r.id= tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_has_cc_licence_url purge; +drop table if exists ${stats_db_name}.indi_result_has_cc_licence_url purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_has_cc_licence_url stored as parquet as select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url @@ -70,32 +70,32 @@ from ${stats_db_name}.result r from ${stats_db_name}.result r join ${stats_db_name}.result_licenses as license on license.id = r.id WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp - on r.id= tmp.id; + on r.id= tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_has_abstract purge; +drop table if exists ${stats_db_name}.indi_pub_has_abstract purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_has_abstract stored as parquet as select distinct publication.id, cast(coalesce(abstract, true) as int) has_abstract -from ${stats_db_name}.publication; +from ${stats_db_name}.publication; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_with_orcid purge; +drop table if exists ${stats_db_name}.indi_result_with_orcid purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from ${stats_db_name}.result r left outer join (select id, 1 as has_orcid from ${stats_db_name}.result_orcid) tmp - on r.id= tmp.id; + on r.id= tmp.id; /*EOS*/ ---- Sprint 3 ---- -drop table if exists ${stats_db_name}.indi_funded_result_with_fundref purge; +drop table if exists ${stats_db_name}.indi_funded_result_with_fundref purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_funded_result_with_fundref stored as parquet as select distinct r.result as id, coalesce(fundref, 0) as fundref from ${stats_db_name}.project_results r left outer join (select distinct result, 1 as fundref from ${stats_db_name}.project_results where provenance='Harvested') tmp - on r.result= tmp.result; + on r.result= tmp.result; /*EOS*/ -- create table indi_result_org_collab stored as parquet as -- select o1.organization org1, o2.organization org2, count(distinct o1.id) as collaborations @@ -105,65 +105,65 @@ from ${stats_db_name}.project_results r -- -- compute stats indi_result_org_collab; -- -create TEMPORARY TABLE ${stats_db_name}.tmp AS SELECT ro.organization organization, ro.id, o.name from ${stats_db_name}.result_organization ro -join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null; +create TEMPORARY VIEW tmp AS SELECT ro.organization organization, ro.id, o.name from ${stats_db_name}.result_organization ro +join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_org_collab purge; +drop table if exists ${stats_db_name}.indi_result_org_collab purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_org_collab stored as parquet as select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations -from ${stats_db_name}.tmp as o1 -join ${stats_db_name}.tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name -group by o1.organization, o2.organization, o1.name, o2.name; +from tmp as o1 +join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name +group by o1.organization, o2.organization, o1.name, o2.name; /*EOS*/ -drop table if exists ${stats_db_name}.tmp purge; +DROP VIEW if exists tmp; /*EOS*/ -create TEMPORARY TABLE ${stats_db_name}.tmp AS +create TEMPORARY VIEW tmp AS select distinct ro.organization organization, ro.id, o.name, o.country from ${stats_db_name}.result_organization ro -join ${stats_db_name}.organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; +join ${stats_db_name}.organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_org_country_collab purge; +drop table if exists ${stats_db_name}.indi_result_org_country_collab purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_org_country_collab stored as parquet as select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations -from ${stats_db_name}.tmp as o1 join ${stats_db_name}.tmp as o2 on o1.id=o2.id +from tmp as o1 join tmp as o2 on o1.id=o2.id where o1.id=o2.id and o1.country!=o2.country -group by o1.organization, o1.id, o1.name, o2.country; +group by o1.organization, o1.id, o1.name, o2.country; /*EOS*/ -drop table if exists ${stats_db_name}.tmp purge; +drop table if exists tmp purge; /*EOS*/ -create TEMPORARY TABLE ${stats_db_name}.tmp AS +create TEMPORARY VIEW tmp AS select o.id organization, o.name, ro.project as project from ${stats_db_name}.organization o - join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null; + join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_project_collab_org purge; +drop table if exists ${stats_db_name}.indi_project_collab_org purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_project_collab_org stored as parquet as select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations -from ${stats_db_name}.tmp as o1 - join ${stats_db_name}.tmp as o2 on o1.project=o2.project +from tmp as o1 + join tmp as o2 on o1.project=o2.project where o1.organization<>o2.organization and o1.name<>o2.name -group by o1.name,o2.name, o1.organization, o2.organization; +group by o1.name,o2.name, o1.organization, o2.organization; /*EOS*/ -drop table if exists ${stats_db_name}.tmp purge; +DROP VIEW if exists tmp; /*EOS*/ -create TEMPORARY TABLE ${stats_db_name}.tmp AS +create TEMPORARY VIEW tmp AS select o.id organization, o.name, o.country , ro.project as project from ${stats_db_name}.organization o join ${stats_db_name}.organization_projects ro on o.id=ro.id - and o.country <> 'UNKNOWN' and o.name is not null; + and o.country <> 'UNKNOWN' and o.name is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_project_collab_org_country purge; +drop table if exists ${stats_db_name}.indi_project_collab_org_country purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_project_collab_org_country stored as parquet as select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations -from ${stats_db_name}.tmp as o1 - join ${stats_db_name}.tmp as o2 on o1.project=o2.project +from tmp as o1 + join tmp as o2 on o1.project=o2.project where o1.organization<>o2.organization and o1.country<>o2.country -group by o1.organization, o2.country, o1.name; +group by o1.organization, o2.country, o1.name; /*EOS*/ -drop table if exists ${stats_db_name}.tmp purge; +DROP VIEW if exists tmp; /*EOS*/ -drop table if exists ${stats_db_name}.indi_funder_country_collab purge; +drop table if exists ${stats_db_name}.indi_funder_country_collab purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_funder_country_collab stored as parquet as with tmp as (select funder, project, country from ${stats_db_name}.organization_projects op @@ -174,26 +174,26 @@ select f1.funder, f1.country as country1, f2.country as country2, count(distinct from tmp as f1 join tmp as f2 on f1.project=f2.project where f1.country<>f2.country -group by f1.funder, f2.country, f1.country; +group by f1.funder, f2.country, f1.country; /*EOS*/ -create TEMPORARY TABLE ${stats_db_name}.tmp AS +create TEMPORARY VIEW tmp AS select distinct country, ro.id as result from ${stats_db_name}.organization o join ${stats_db_name}.result_organization ro on o.id=ro.organization - where country <> 'UNKNOWN' and o.name is not null; + where country <> 'UNKNOWN' and o.name is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_country_collab purge; +drop table if exists ${stats_db_name}.indi_result_country_collab purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_country_collab stored as parquet as select o1.country country1, o2.country country2, count(o1.result) as collaborations -from ${stats_db_name}.tmp as o1 - join ${stats_db_name}.tmp as o2 on o1.result=o2.result +from tmp as o1 + join tmp as o2 on o1.result=o2.result where o1.country<>o2.country -group by o1.country, o2.country; +group by o1.country, o2.country; /*EOS*/ -drop table if exists ${stats_db_name}.tmp purge; +DROP VIEW if exists tmp; /*EOS*/ ---- Sprint 4 ---- -drop table if exists ${stats_db_name}.indi_pub_diamond purge; +drop table if exists ${stats_db_name}.indi_pub_diamond purge; /*EOS*/ --create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as --select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal @@ -212,9 +212,9 @@ left outer join (select pd.id, 1 as in_diamond_journal from ${stats_db_name}.pub join ${stats_db_name}.datasource d on d.id=pd.datasource join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp -on pd.id=tmp.id; +on pd.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_in_transformative purge; +drop table if exists ${stats_db_name}.indi_pub_in_transformative purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative @@ -224,9 +224,9 @@ from ${stats_db_name}.publication pd join ${stats_db_name}.datasource d on d.id=pd.datasource join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and ps.is_transformative_journal=true) tmp - on pd.id=tmp.id; + on pd.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_closed_other_open purge; +drop table if exists ${stats_db_name}.indi_pub_closed_other_open purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_closed_other_open stored as parquet as select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from ${stats_db_name}.result_instance ri @@ -236,53 +236,53 @@ select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_op join ${stats_db_name}.datasource d on ri.hostedby=d.id where d.type like '%Journal%' and ri.accessright='Closed Access' and (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp - on tmp.id=ri.id; + on tmp.id=ri.id; /*EOS*/ ---- Sprint 5 ---- -drop table if exists ${stats_db_name}.indi_result_no_of_copies purge; +drop table if exists ${stats_db_name}.indi_result_no_of_copies purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_no_of_copies stored as parquet as -select id, count(id) as number_of_copies from ${stats_db_name}.result_instance group by id; +select id, count(id) as number_of_copies from ${stats_db_name}.result_instance group by id; /*EOS*/ ---- Sprint 6 ---- -drop table if exists ${stats_db_name}.indi_pub_downloads purge; +drop table if exists ${stats_db_name}.indi_pub_downloads purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_downloads stored as parquet as SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats join ${stats_db_name}.publication on result_id=id where downloads>0 GROUP BY result_id -order by no_downloads desc; +order by no_downloads desc; /*EOS*/ --ANALYZE TABLE ${stats_db_name}.indi_pub_downloads COMPUTE STATISTICS; -drop table if exists ${stats_db_name}.indi_pub_downloads_datasource purge; +drop table if exists ${stats_db_name}.indi_pub_downloads_datasource purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_downloads_datasource stored as parquet as SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats join ${stats_db_name}.publication on result_id=id where downloads>0 GROUP BY result_id, repository_id -order by result_id; +order by result_id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_downloads_year purge; +drop table if exists ${stats_db_name}.indi_pub_downloads_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_downloads_year stored as parquet as SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us join ${stats_db_name}.publication on result_id=id where downloads>0 -GROUP BY result_id, substring(us.`date`, 1,4); +GROUP BY result_id, substring(us.`date`, 1,4); /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_downloads_datasource_year purge; +drop table if exists ${stats_db_name}.indi_pub_downloads_datasource_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_downloads_datasource_year stored as parquet as SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us join ${stats_db_name}.publication on result_id=id where downloads>0 -GROUP BY result_id, repository_id, substring(us.`date`, 1,4); +GROUP BY result_id, repository_id, substring(us.`date`, 1,4); /*EOS*/ ---- Sprint 7 ---- -drop table if exists ${stats_db_name}.indi_pub_gold_oa purge; +drop table if exists ${stats_db_name}.indi_pub_gold_oa purge; /*EOS*/ --create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as -- WITH gold_oa AS ( SELECT @@ -381,9 +381,9 @@ left outer join ( select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd join dd on dd.id=pd.datasource - left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; + left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; +drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as WITH hybrid_oa AS ( @@ -414,9 +414,9 @@ FROM ${stats_db_name}.publication_datasources pd JOIN hybrid_oa ON issn.issn = hybrid_oa.issn JOIN ${stats_db_name}.indi_result_has_cc_licence cc on pd.id=cc.id JOIN ${stats_db_name}.indi_pub_gold_oa ga on pd.id=ga.id - where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; + where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_hybrid purge; +drop table if exists ${stats_db_name}.indi_pub_hybrid purge; /*EOS*/ --create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as -- WITH gold_oa AS ( SELECT @@ -489,9 +489,9 @@ join ${stats_db_name}.result_accessroute ra on ra.id=pd.id join ${stats_db_name}.datasource d on d.id=ri.hostedby where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or ra.accessroute='hybrid'))tmp -on pd.id=tmp.id; +on pd.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_fairness purge; +drop table if exists ${stats_db_name}.indi_org_fairness purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as --return results with PIDs, and rich metadata group by organization @@ -509,9 +509,9 @@ create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet --return results_fair/all_results select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness from allresults - join result_fair on result_fair.organization=allresults.organization; + join result_fair on result_fair.organization=allresults.organization; /*EOS*/ -CREATE TEMPORARY table ${stats_db_name}.result_fair as +CREATE TEMPORARY VIEW result_fair as select ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro join ${stats_db_name}.publication p on p.id=ro.id @@ -519,296 +519,296 @@ select ro.organization organization, count(distinct ro.id) no_result_fair join ${stats_db_name}.indi_pub_grey_lit gl on gl.id=p.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 and dc.doi_from_crossref=1 and gl.grey_lit=0 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +CREATE TEMPORARY VIEW allresults as select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.publication p on p.id=ro.id where cast(year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_fairness_pub_pr purge; +drop table if exists ${stats_db_name}.indi_org_fairness_pub_pr purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_fairness_pub_pr stored as parquet as select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness -from ${stats_db_name}.allresults ar - join ${stats_db_name}.result_fair rf on rf.organization=ar.organization; +from allresults ar + join result_fair rf on rf.organization=ar.organization; /*EOS*/ -DROP table ${stats_db_name}.result_fair purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_fair; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY table ${stats_db_name}.result_fair as +CREATE TEMPORARY VIEW result_fair as select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro join ${stats_db_name}.result p on p.id=ro.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as select year, ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro +CREATE TEMPORARY VIEW allresults as select year, ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.result p on p.id=ro.id where cast(year as int)>2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_fairness_pub_year purge; +drop table if exists ${stats_db_name}.indi_org_fairness_pub_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_fairness_pub_year stored as parquet as select cast(allresults.year as int) year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness -from ${stats_db_name}.allresults - join ${stats_db_name}.result_fair on result_fair.organization=allresults.organization and result_fair.year=allresults.year; +from allresults + join result_fair on result_fair.organization=allresults.organization and result_fair.year=allresults.year; /*EOS*/ -DROP table ${stats_db_name}.result_fair purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_fair; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.result_fair as +CREATE TEMPORARY VIEW result_fair as select ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro join ${stats_db_name}.result p on p.id=ro.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +CREATE TEMPORARY VIEW allresults as select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.result p on p.id=ro.id where cast(year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_fairness_pub purge; +drop table if exists ${stats_db_name}.indi_org_fairness_pub purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_fairness_pub as select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness -from ${stats_db_name}.allresults ar join ${stats_db_name}.result_fair rf -on rf.organization=ar.organization; +from allresults ar join result_fair rf +on rf.organization=ar.organization; /*EOS*/ -DROP table ${stats_db_name}.result_fair purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_fair; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.result_fair as +CREATE TEMPORARY VIEW result_fair as select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro join ${stats_db_name}.result r on r.id=ro.id join ${stats_db_name}.result_pids rp on r.id=rp.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +CREATE TEMPORARY VIEW allresults as select year, ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.result r on r.id=ro.id where cast(year as int)>2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_fairness_year purge; +drop table if exists ${stats_db_name}.indi_org_fairness_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_fairness_year stored as parquet as select cast(allresults.year as int) year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness - from ${stats_db_name}.allresults - join ${stats_db_name}.result_fair on result_fair.organization=allresults.organization and cast(result_fair.year as int)=cast(allresults.year as int); + from allresults + join result_fair on result_fair.organization=allresults.organization and cast(result_fair.year as int)=cast(allresults.year as int); /*EOS*/ -DROP table ${stats_db_name}.result_fair purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_fair; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.result_with_pid as +CREATE TEMPORARY VIEW result_with_pid as select year, ro.organization, count(distinct rp.id) no_result_with_pid from ${stats_db_name}.result_organization ro join ${stats_db_name}.result_pids rp on rp.id=ro.id join ${stats_db_name}.result r on r.id=rp.id where cast(year as int) >2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +CREATE TEMPORARY VIEW allresults as select year, ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.result r on r.id=ro.id where cast(year as int) >2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_findable_year purge; +drop table if exists ${stats_db_name}.indi_org_findable_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_findable_year stored as parquet as select cast(allresults.year as int) year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable -from ${stats_db_name}.allresults - join ${stats_db_name}.result_with_pid on result_with_pid.organization=allresults.organization and cast(result_with_pid.year as int)=cast(allresults.year as int); +from allresults + join result_with_pid on result_with_pid.organization=allresults.organization and cast(result_with_pid.year as int)=cast(allresults.year as int); /*EOS*/ -DROP table ${stats_db_name}.result_with_pid purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_with_pid; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.result_with_pid as +CREATE TEMPORARY VIEW result_with_pid as select ro.organization, count(distinct rp.id) no_result_with_pid from ${stats_db_name}.result_organization ro join ${stats_db_name}.result_pids rp on rp.id=ro.id join ${stats_db_name}.result r on r.id=rp.id where cast(year as int) >2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +CREATE TEMPORARY VIEW allresults as select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.result r on r.id=ro.id where cast(year as int) >2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_findable purge; +drop table if exists ${stats_db_name}.indi_org_findable purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_findable stored as parquet as select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable -from ${stats_db_name}.allresults - join ${stats_db_name}.result_with_pid on result_with_pid.organization=allresults.organization; +from allresults + join result_with_pid on result_with_pid.organization=allresults.organization; /*EOS*/ -DROP table ${stats_db_name}.result_with_pid purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_with_pid; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa as +CREATE TEMPORARY VIEW pubs_oa as SELECT ro.organization, count(distinct r.id) no_oapubs FROM ${stats_db_name}.publication r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa as +CREATE TEMPORARY VIEW datasets_oa as SELECT ro.organization, count(distinct r.id) no_oadatasets FROM ${stats_db_name}.dataset r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.software_oa as +CREATE TEMPORARY VIEW software_oa as SELECT ro.organization, count(distinct r.id) no_oasoftware FROM ${stats_db_name}.software r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as +CREATE TEMPORARY VIEW allpubs as SELECT ro.organization, count(ro.id) no_allpubs FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.publication ps on ps.id=ro.id where cast(ps.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as +CREATE TEMPORARY VIEW alldatasets as SELECT ro.organization, count(ro.id) no_alldatasets FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.dataset ps on ps.id=ro.id where cast(ps.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as +CREATE TEMPORARY VIEW allsoftware as SELECT ro.organization, count(ro.id) no_allsoftware FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.software ps on ps.id=ro.id where cast(ps.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as -select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs - join ${stats_db_name}.pubs_oa on allpubs.organization=pubs_oa.organization; +CREATE TEMPORARY VIEW allpubsshare as +select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs + join pubs_oa on allpubs.organization=pubs_oa.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as +CREATE TEMPORARY VIEW alldatasetssshare as select datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d - from ${stats_db_name}.alldatasets - join ${stats_db_name}.datasets_oa on alldatasets.organization=datasets_oa.organization; + from alldatasets + join datasets_oa on alldatasets.organization=datasets_oa.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as +CREATE TEMPORARY VIEW allsoftwaresshare as select software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s - from ${stats_db_name}.allsoftware - join ${stats_db_name}.software_oa on allsoftware.organization=software_oa.organization; + from allsoftware + join software_oa on allsoftware.organization=software_oa.organization; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_openess purge; +drop table if exists ${stats_db_name}.indi_org_openess purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_openess stored as parquet as select allpubsshare.organization, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) - org_openess FROM ${stats_db_name}.allpubsshare + org_openess FROM allpubsshare left outer join (select organization,d from - ${stats_db_name}.alldatasetssshare) tmp1 + alldatasetssshare) tmp1 on tmp1.organization=allpubsshare.organization left outer join (select organization,s from - ${stats_db_name}.allsoftwaresshare) tmp2 - on tmp2.organization=allpubsshare.organization; + allsoftwaresshare) tmp2 + on tmp2.organization=allpubsshare.organization; /*EOS*/ -DROP TABLE ${stats_db_name}.pubs_oa purge; -DROP TABLE ${stats_db_name}.datasets_oa purge; -DROP TABLE ${stats_db_name}.software_oa purge; -DROP TABLE ${stats_db_name}.allpubs purge; -DROP TABLE ${stats_db_name}.alldatasets purge; -DROP TABLE ${stats_db_name}.allsoftware purge; -DROP TABLE ${stats_db_name}.allpubsshare purge; -DROP TABLE ${stats_db_name}.alldatasetssshare purge; -DROP TABLE ${stats_db_name}.allsoftwaresshare purge; +DROP VIEW pubs_oa; /*EOS*/ +DROP VIEW datasets_oa; /*EOS*/ +DROP VIEW software_oa; /*EOS*/ +DROP VIEW allpubs; /*EOS*/ +DROP VIEW alldatasets; /*EOS*/ +DROP VIEW allsoftware; /*EOS*/ +DROP VIEW allpubsshare; /*EOS*/ +DROP VIEW alldatasetssshare; /*EOS*/ +DROP VIEW allsoftwaresshare; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa AS +CREATE TEMPORARY VIEW pubs_oa AS SELECT r.year, ro.organization, count(distinct r.id) no_oapubs FROM ${stats_db_name}.publication r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization,r.year; + group by ro.organization,r.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa AS +CREATE TEMPORARY VIEW datasets_oa AS SELECT r.year,ro.organization, count(distinct r.id) no_oadatasets FROM ${stats_db_name}.dataset r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization, r.year; + group by ro.organization, r.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.software_oa AS +CREATE TEMPORARY VIEW software_oa AS SELECT r.year,ro.organization, count(distinct r.id) no_oasoftware FROM ${stats_db_name}.software r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization, r.year; + group by ro.organization, r.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as +CREATE TEMPORARY VIEW allpubs as SELECT p.year,ro.organization organization, count(ro.id) no_allpubs FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.publication p on p.id=ro.id where cast(p.year as int)>2003 - group by ro.organization, p.year; + group by ro.organization, p.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as +CREATE TEMPORARY VIEW alldatasets as SELECT d.year, ro.organization organization, count(ro.id) no_alldatasets FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.dataset d on d.id=ro.id where cast(d.year as int)>2003 - group by ro.organization, d.year; + group by ro.organization, d.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as +CREATE TEMPORARY VIEW allsoftware as SELECT s.year,ro.organization organization, count(ro.id) no_allsoftware FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.software s on s.id=ro.id where cast(s.year as int)>2003 - group by ro.organization, s.year; + group by ro.organization, s.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as -select allpubs.year, pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs - join ${stats_db_name}.pubs_oa on allpubs.organization=pubs_oa.organization where cast(allpubs.year as INT)=cast(pubs_oa.year as int); +CREATE TEMPORARY VIEW allpubsshare as +select allpubs.year, pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs + join pubs_oa on allpubs.organization=pubs_oa.organization where cast(allpubs.year as INT)=cast(pubs_oa.year as int); /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as +CREATE TEMPORARY VIEW alldatasetssshare as select alldatasets.year, datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d - from ${stats_db_name}.alldatasets - join ${stats_db_name}.datasets_oa on alldatasets.organization=datasets_oa.organization where cast(alldatasets.year as INT)=cast(datasets_oa.year as int); + from alldatasets + join datasets_oa on alldatasets.organization=datasets_oa.organization where cast(alldatasets.year as INT)=cast(datasets_oa.year as int); /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as +CREATE TEMPORARY VIEW allsoftwaresshare as select allsoftware.year, software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s - from ${stats_db_name}.allsoftware - join ${stats_db_name}.software_oa on allsoftware.organization=software_oa.organization where cast(allsoftware.year as INT)=cast(software_oa.year as int); + from allsoftware + join software_oa on allsoftware.organization=software_oa.organization where cast(allsoftware.year as INT)=cast(software_oa.year as int); /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_openess_year purge; +drop table if exists ${stats_db_name}.indi_org_openess_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_openess_year stored as parquet as select cast(allpubsshare.year as int) year, allpubsshare.organization, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) - org_openess FROM ${stats_db_name}.allpubsshare + org_openess FROM allpubsshare left outer join (select cast(year as int), organization,d from - ${stats_db_name}.alldatasetssshare) tmp1 + alldatasetssshare) tmp1 on tmp1.organization=allpubsshare.organization and tmp1.year=allpubsshare.year left outer join (select cast(year as int), organization,s from - ${stats_db_name}.allsoftwaresshare) tmp2 - on tmp2.organization=allpubsshare.organization and cast(tmp2.year as int)=cast(allpubsshare.year as int); + allsoftwaresshare) tmp2 + on tmp2.organization=allpubsshare.organization and cast(tmp2.year as int)=cast(allpubsshare.year as int); /*EOS*/ -DROP TABLE ${stats_db_name}.pubs_oa purge; -DROP TABLE ${stats_db_name}.datasets_oa purge; -DROP TABLE ${stats_db_name}.software_oa purge; -DROP TABLE ${stats_db_name}.allpubs purge; -DROP TABLE ${stats_db_name}.alldatasets purge; -DROP TABLE ${stats_db_name}.allsoftware purge; -DROP TABLE ${stats_db_name}.allpubsshare purge; -DROP TABLE ${stats_db_name}.alldatasetssshare purge; -DROP TABLE ${stats_db_name}.allsoftwaresshare purge; +DROP VIEW pubs_oa; /*EOS*/ +DROP VIEW datasets_oa; /*EOS*/ +DROP VIEW software_oa; /*EOS*/ +DROP VIEW allpubs; /*EOS*/ +DROP VIEW alldatasets; /*EOS*/ +DROP VIEW allsoftware; /*EOS*/ +DROP VIEW allpubsshare; /*EOS*/ +DROP VIEW alldatasetssshare; /*EOS*/ +DROP VIEW allsoftwaresshare; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_has_preprint purge; +drop table if exists ${stats_db_name}.indi_pub_has_preprint purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_has_preprint stored as parquet as select distinct p.id, coalesce(has_preprint, 0) as has_preprint @@ -817,8 +817,8 @@ from ${stats_db_name}.publication_classifications p select p.id, 1 as has_preprint from ${stats_db_name}.publication_classifications p where p.type='Preprint') tmp - on p.id= tmp.id; -drop table if exists ${stats_db_name}.indi_pub_in_subscribed purge; + on p.id= tmp.id; /*EOS*/ +drop table if exists ${stats_db_name}.indi_pub_in_subscribed purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_in_subscribed stored as parquet as select distinct p.id, coalesce(is_subscription, 0) as is_subscription @@ -829,9 +829,9 @@ from ${stats_db_name}.publication p join ${stats_db_name}.indi_pub_hybrid h on p.id=h.id join ${stats_db_name}.indi_pub_in_transformative t on p.id=t.id where g.is_gold=0 and h.is_hybrid=0 and t.is_transformative=0) tmp - on p.id=tmp.id; + on p.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_with_pid purge; +drop table if exists ${stats_db_name}.indi_result_with_pid purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_with_pid as select distinct p.id, coalesce(result_with_pid, 0) as result_with_pid @@ -839,25 +839,25 @@ from ${stats_db_name}.result p left outer join ( select p.id, 1 as result_with_pid from ${stats_db_name}.result_pids p) tmp - on p.id= tmp.id; + on p.id= tmp.id; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.pub_fos_totals as +CREATE TEMPORARY VIEW pub_fos_totals as select rf.id, count(distinct lvl3) totals from ${stats_db_name}.result_fos rf -group by rf.id; +group by rf.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_interdisciplinarity purge; +drop table if exists ${stats_db_name}.indi_pub_interdisciplinarity purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_interdisciplinarity as select distinct p.id as id, coalesce(is_interdisciplinary, 0) as is_interdisciplinary -from ${stats_db_name}.pub_fos_totals p +from pub_fos_totals p left outer join ( -select pub_fos_totals.id, 1 as is_interdisciplinary from ${stats_db_name}.pub_fos_totals -where totals>1) tmp on p.id=tmp.id; +select pub_fos_totals.id, 1 as is_interdisciplinary from pub_fos_totals +where totals>1) tmp on p.id=tmp.id; /*EOS*/ -drop table ${stats_db_name}.pub_fos_totals purge; +drop view pub_fos_totals; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge; +drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge; /*EOS*/ --create table if not exists ${stats_db_name}.indi_pub_bronze_oa stored as parquet as --select distinct p.id, coalesce(is_bronze_oa,0) as is_bronze_oa @@ -883,38 +883,38 @@ join ${stats_db_name}.datasource d on d.id=ri.hostedby where indi_gold.is_gold=0 and indi_hybrid.is_hybrid=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is null) or ra.accessroute='bronze')) tmp -on pd.id=tmp.id; +on pd.id=tmp.id; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.project_year_result_year as +CREATE TEMPORARY VIEW project_year_result_year as select p.id project_id, acronym, r.id result_id, r.year, p.end_year from ${stats_db_name}.project p join ${stats_db_name}.result_projects rp on p.id=rp.project join ${stats_db_name}.result r on r.id=rp.id -where p.end_year is NOT NULL and r.year is not null; +where p.end_year is NOT NULL and r.year is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_is_project_result_after purge; +drop table if exists ${stats_db_name}.indi_is_project_result_after purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_is_project_result_after stored as parquet as select pry.project_id, pry.acronym, pry.result_id, coalesce(is_project_result_after, 0) as is_project_result_after -from ${stats_db_name}.project_year_result_year pry +from project_year_result_year pry left outer join (select pry.project_id, pry.acronym, pry.result_id, 1 as is_project_result_after -from ${stats_db_name}.project_year_result_year pry -where pry.year>pry.end_year) tmp on pry.result_id=tmp.result_id; +from project_year_result_year pry +where pry.year>pry.end_year) tmp on pry.result_id=tmp.result_id; /*EOS*/ -drop table ${stats_db_name}.project_year_result_year purge; +drop view project_year_result_year; /*EOS*/ -drop table ${stats_db_name}.indi_is_funder_plan_s purge; +drop table if exists ${stats_db_name}.indi_is_funder_plan_s purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_is_funder_plan_s stored as parquet as select distinct f.id, f.name, coalesce(is_funder_plan_s, 0) as is_funder_plan_s from ${stats_db_name}.funder f left outer join (select id, name, 1 as is_funder_plan_s from ${stats_db_name}.funder join stats_ext.plan_s_short on c_o_alition_s_organisation_funder=name) tmp - on f.name= tmp.name; + on f.name= tmp.name; /*EOS*/ --Funder Fairness -drop table ${stats_db_name}.indi_funder_fairness purge; +drop table if exists ${stats_db_name}.indi_funder_fairness purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_funder_fairness stored as parquet as with result_fair as @@ -930,10 +930,10 @@ create table if not exists ${stats_db_name}.indi_funder_fairness stored as parqu group by p.funder) select allresults.funder, result_fair.no_result_fair/allresults.no_allresults funder_fairness from allresults - join result_fair on result_fair.funder=allresults.funder; + join result_fair on result_fair.funder=allresults.funder; /*EOS*/ --RIs Fairness -drop table ${stats_db_name}.indi_ris_fairness purge; +drop table if exists ${stats_db_name}.indi_ris_fairness purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_ris_fairness stored as parquet as with result_contexts as @@ -953,188 +953,188 @@ allresults as group by rc.ri_initiative) select allresults.ri_initiative, result_fair.no_result_fair/allresults.no_allresults ris_fairness from allresults - join result_fair on result_fair.ri_initiative=allresults.ri_initiative; + join result_fair on result_fair.ri_initiative=allresults.ri_initiative; /*EOS*/ --Funder Openess -CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa as +CREATE TEMPORARY VIEW pubs_oa as select p.funder funder, count(distinct rp.id) no_oapubs from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.publication r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa as +CREATE TEMPORARY VIEW datasets_oa as select p.funder funder, count(distinct rp.id) no_oadatasets from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.dataset r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.software_oa as +CREATE TEMPORARY VIEW software_oa as select p.funder funder, count(distinct rp.id) no_oasoftware from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.software r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as +CREATE TEMPORARY VIEW allpubs as select p.funder funder, count(distinct rp.id) no_allpubs from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.publication r on r.id=rp.id where cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as +CREATE TEMPORARY VIEW alldatasets as select p.funder funder, count(distinct rp.id) no_alldatasets from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.dataset r on r.id=rp.id where cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as +CREATE TEMPORARY VIEW allsoftware as select p.funder funder, count(distinct rp.id) no_allsoftware from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.software r on r.id=rp.id where cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as -select pubs_oa.funder, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs - join ${stats_db_name}.pubs_oa on allpubs.funder=pubs_oa.funder; +CREATE TEMPORARY VIEW allpubsshare as +select pubs_oa.funder, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs + join pubs_oa on allpubs.funder=pubs_oa.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as +CREATE TEMPORARY VIEW alldatasetssshare as select datasets_oa.funder, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d - from ${stats_db_name}.alldatasets - join ${stats_db_name}.datasets_oa on alldatasets.funder=datasets_oa.funder; + from alldatasets + join datasets_oa on alldatasets.funder=datasets_oa.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as +CREATE TEMPORARY VIEW allsoftwaresshare as select software_oa.funder, software_oa.no_oasoftware/allsoftware.no_allsoftware s - from ${stats_db_name}.allsoftware - join ${stats_db_name}.software_oa on allsoftware.funder=software_oa.funder; + from allsoftware + join software_oa on allsoftware.funder=software_oa.funder; /*EOS*/ -drop table ${stats_db_name}.indi_funder_openess purge; +drop table if exists ${stats_db_name}.indi_funder_openess purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_funder_openess stored as parquet as select allpubsshare.funder, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) - funder_openess FROM ${stats_db_name}.allpubsshare + funder_openess FROM allpubsshare left outer join (select funder,d from - ${stats_db_name}.alldatasetssshare) tmp1 + alldatasetssshare) tmp1 on tmp1.funder=allpubsshare.funder left outer join (select funder,s from - ${stats_db_name}.allsoftwaresshare) tmp2 - on tmp2.funder=allpubsshare.funder; + allsoftwaresshare) tmp2 + on tmp2.funder=allpubsshare.funder; /*EOS*/ -DROP TABLE ${stats_db_name}.pubs_oa purge; -DROP TABLE ${stats_db_name}.datasets_oa purge; -DROP TABLE ${stats_db_name}.software_oa purge; -DROP TABLE ${stats_db_name}.allpubs purge; -DROP TABLE ${stats_db_name}.alldatasets purge; -DROP TABLE ${stats_db_name}.allsoftware purge; -DROP TABLE ${stats_db_name}.allpubsshare purge; -DROP TABLE ${stats_db_name}.alldatasetssshare purge; -DROP TABLE ${stats_db_name}.allsoftwaresshare purge; +DROP VIEW pubs_oa; /*EOS*/ +DROP VIEW datasets_oa; /*EOS*/ +DROP VIEW software_oa; /*EOS*/ +DROP VIEW allpubs; /*EOS*/ +DROP VIEW alldatasets; /*EOS*/ +DROP VIEW allsoftware; /*EOS*/ +DROP VIEW allpubsshare; /*EOS*/ +DROP VIEW alldatasetssshare; /*EOS*/ +DROP VIEW allsoftwaresshare; /*EOS*/ --RIs Openess -CREATE TEMPORARY TABLE ${stats_db_name}.result_contexts as +CREATE TEMPORARY VIEW result_contexts as select distinct rc.id, context.name ri_initiative from ${stats_db_name}.result_concepts rc join ${stats_db_name}.concept on concept.id=rc.concept join ${stats_db_name}.category on category.id=concept.category -join ${stats_db_name}.context on context.id=category.context; +join ${stats_db_name}.context on context.id=category.context; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oapubs from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW pubs_oa as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oapubs from result_contexts rp join ${stats_db_name}.publication r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oadatasets from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW datasets_oa as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oadatasets from result_contexts rp join ${stats_db_name}.dataset r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.software_oa as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oasoftware from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW software_oa as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oasoftware from result_contexts rp join ${stats_db_name}.software r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_allpubs from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW allpubs as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_allpubs from result_contexts rp join ${stats_db_name}.publication r on r.id=rp.id where cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_alldatasets from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW alldatasets as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_alldatasets from result_contexts rp join ${stats_db_name}.dataset r on r.id=rp.id where cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_allsoftware from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW allsoftware as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_allsoftware from result_contexts rp join ${stats_db_name}.software r on r.id=rp.id where cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as -select pubs_oa.ri_initiative, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs - join ${stats_db_name}.pubs_oa on allpubs.ri_initiative=pubs_oa.ri_initiative; +CREATE TEMPORARY VIEW allpubsshare as +select pubs_oa.ri_initiative, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs + join pubs_oa on allpubs.ri_initiative=pubs_oa.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as +CREATE TEMPORARY VIEW alldatasetssshare as select datasets_oa.ri_initiative, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d - from ${stats_db_name}.alldatasets - join ${stats_db_name}.datasets_oa on alldatasets.ri_initiative=datasets_oa.ri_initiative; + from alldatasets + join datasets_oa on alldatasets.ri_initiative=datasets_oa.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as +CREATE TEMPORARY VIEW allsoftwaresshare as select software_oa.ri_initiative, software_oa.no_oasoftware/allsoftware.no_allsoftware s - from ${stats_db_name}.allsoftware - join ${stats_db_name}.software_oa on allsoftware.ri_initiative=software_oa.ri_initiative; + from allsoftware + join software_oa on allsoftware.ri_initiative=software_oa.ri_initiative; /*EOS*/ -drop table ${stats_db_name}.indi_ris_openess purge; +drop table if exists ${stats_db_name}.indi_ris_openess purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_ris_openess stored as parquet as select allpubsshare.ri_initiative, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) - ris_openess FROM ${stats_db_name}.allpubsshare + ris_openess FROM allpubsshare left outer join (select ri_initiative,d from - ${stats_db_name}.alldatasetssshare) tmp1 + alldatasetssshare) tmp1 on tmp1.ri_initiative=allpubsshare.ri_initiative left outer join (select ri_initiative,s from - ${stats_db_name}.allsoftwaresshare) tmp2 - on tmp2.ri_initiative=allpubsshare.ri_initiative; + allsoftwaresshare) tmp2 + on tmp2.ri_initiative=allpubsshare.ri_initiative; /*EOS*/ -DROP TABLE ${stats_db_name}.result_contexts purge; -DROP TABLE ${stats_db_name}.pubs_oa purge; -DROP TABLE ${stats_db_name}.datasets_oa purge; -DROP TABLE ${stats_db_name}.software_oa purge; -DROP TABLE ${stats_db_name}.allpubs purge; -DROP TABLE ${stats_db_name}.alldatasets purge; -DROP TABLE ${stats_db_name}.allsoftware purge; -DROP TABLE ${stats_db_name}.allpubsshare purge; -DROP TABLE ${stats_db_name}.alldatasetssshare purge; -DROP TABLE ${stats_db_name}.allsoftwaresshare purge; +DROP VIEW result_contexts; /*EOS*/ +DROP VIEW pubs_oa; /*EOS*/ +DROP VIEW datasets_oa; /*EOS*/ +DROP VIEW software_oa; /*EOS*/ +DROP VIEW allpubs; /*EOS*/ +DROP VIEW alldatasets; /*EOS*/ +DROP VIEW allsoftware; /*EOS*/ +DROP VIEW allpubsshare; /*EOS*/ +DROP VIEW alldatasetssshare; /*EOS*/ +DROP VIEW allsoftwaresshare; /*EOS*/ --Funder Findability -drop table ${stats_db_name}.indi_funder_findable purge; +drop table if exists ${stats_db_name}.indi_funder_findable purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_funder_findable stored as parquet as with result_findable as @@ -1151,10 +1151,10 @@ with result_findable as group by p.funder) select allresults.funder, result_findable.no_result_findable/allresults.no_allresults funder_findable from allresults - join result_findable on result_findable.funder=allresults.funder; + join result_findable on result_findable.funder=allresults.funder; /*EOS*/ --RIs Findability -drop table ${stats_db_name}.indi_ris_findable purge; +drop table if exists ${stats_db_name}.indi_ris_findable purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_ris_findable stored as parquet as with result_contexts as @@ -1175,7 +1175,7 @@ allresults as group by rc.ri_initiative) select allresults.ri_initiative, result_findable.no_result_findable/allresults.no_allresults ris_findable from allresults - join result_findable on result_findable.ri_initiative=allresults.ri_initiative; + join result_findable on result_findable.ri_initiative=allresults.ri_initiative; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_publicly_funded stored as parquet as with org_names_pids as @@ -1195,6 +1195,7 @@ and pf.publicly_funded='yes') foo) select distinct p.id, coalesce(publicly_funded, 0) as publicly_funded from ${stats_db_name}.publication p left outer join ( -select distinct ro.id, 1 as publicly_funded from result_organization ro +select distinct ro.id, 1 as publicly_funded from ${stats_db_name}.result_organization ro join ${stats_db_name}.organization o on o.id=ro.organization -join publicly_funded_orgs pfo on o.name=pfo.name) tmp on p.id=tmp.id; \ No newline at end of file +join publicly_funded_orgs pfo on o.name=pfo.name) tmp on p.id=tmp.id; /*EOS*/ + diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index f15f223209..709de65959 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -64,6 +64,26 @@ hadoop_user_name user name of the wf owner + + + sparkSqlWarehouseDir + + + + sparkClusterOpts + --conf spark.network.timeout=600 --conf spark.extraListeners= --conf spark.sql.queryExecutionListeners= --conf spark.yarn.historyServer.address=http://iis-cdh5-test-m3.ocean.icm.edu.pl:18088 --conf spark.eventLog.dir=hdfs://nameservice1/user/spark/applicationHistory + spark cluster-wide options + + + sparkResourceOpts + --executor-memory=6G --conf spark.executor.memoryOverhead=4G --executor-cores=6 --driver-memory=8G --driver-cores=4 + spark resource options + + + sparkApplicationOpts + --conf spark.sql.shuffle.partitions=3840 + spark resource options + @@ -75,17 +95,21 @@ ${hive_metastore_uris} - hive.txn.timeout - ${hive_timeout} + hive.txn.timeout + ${hive_timeout} hive.mapjoin.followby.gby.localtask.max.memory.usage 0.80 - - mapred.job.queue.name - analytics - + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + mapred.job.queue.name + analytics + @@ -133,164 +157,164 @@ ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - external_stats_db_name=${external_stats_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + external_stats_db_name=${external_stats_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - external_stats_db_name=${external_stats_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + external_stats_db_name=${external_stats_db_name} - - + + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} @@ -322,12 +346,23 @@ - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - external_stats_db_name=${external_stats_db_name} - + + yarn + cluster + Step16-createIndicatorsTables + eu.dnetlib.dhp.oozie.RunSQLSparkJob + dhp-stats-update-${projectVersion}.jar + + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + ${sparkClusterOpts} + ${sparkResourceOpts} + ${sparkApplicationOpts} + + --hiveMetastoreUris${hive_metastore_uris} + --sqleu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql + --stats_db_name${stats_db_name} + --external_stats_db_name${external_stats_db_name} + @@ -387,18 +422,18 @@ - - - - - - - - - - - - + + + + + + + + + + + + @@ -443,8 +478,8 @@ ${jobTracker} ${nameNode} copyDataToImpalaCluster.sh - - + + ${stats_db_name} ${monitor_db_name} ${observatory_db_name} @@ -505,4 +540,4 @@ - + \ No newline at end of file From a5995ab557721770f6bebeb41e65270e8597a11f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 29 Jan 2024 18:19:48 +0100 Subject: [PATCH 49/56] [orcid-enrichment] change the value of parameters. --- .../oozie_app/workflow.xml | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index bab1e55dfa..a9642d6379 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -114,7 +114,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -142,7 +142,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -170,7 +170,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -198,7 +198,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/orcid/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -225,8 +225,8 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --sourcePath${workingDir}/orcid/orcidprop - --outputPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc + --sourcePath${workingDir}/orcid/targetOrcidAssoc + --outputPath${workingDir}/orcid/mergedOrcidAssoc @@ -247,9 +247,10 @@ eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=4 + --executor-memory=4G --driver-memory=${sparkDriverMemory} + --conf spark.executor.memoryOverhead=5G --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} @@ -259,9 +260,9 @@ --conf spark.speculation=false --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=15000 - --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/publication @@ -291,7 +292,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/dataset @@ -321,7 +322,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/otherresearchproduct @@ -351,7 +352,7 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/orcid/orcidprop/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/software From f2a08d8cc28f078eb0ce7216f4bfa78db0063beb Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 30 Jan 2024 19:20:14 +0100 Subject: [PATCH 50/56] test for Italian records from IRS repositories --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 15 ++ .../eu/dnetlib/dhp/oa/graph/raw/iris-odf.xml | 215 ++++++++++++++++++ .../oa/provision/XmlRecordFactoryTest.java | 23 ++ .../dnetlib/dhp/oa/provision/iris-odf-4.json | 1 + 4 files changed, 254 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/iris-odf.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/iris-odf-4.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index ac0435ce21..bbb8e7d573 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1250,6 +1250,21 @@ class MappersTest { System.out.println("***************"); } + @Test + void testIRISPub() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("iris-odf.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + final Publication p = (Publication) list.get(0); + assertNotNull(p.getInstance().get(0).getUrl().get(0)); + assertValidId(p.getId()); + System.out.println(p.getInstance().get(0).getUrl()); + p.getPid().forEach(x -> System.out.println(x.getValue())); + p.getInstance().get(0).getAlternateIdentifier().forEach(x -> System.out.println(x.getValue())); + + } @Test void testNotWellFormed() throws IOException { final String xml = IOUtils diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/iris-odf.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/iris-odf.xml new file mode 100644 index 0000000000..1b875ec361 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/iris-odf.xml @@ -0,0 +1,215 @@ + + + + oai:air.unimi.it:2434/907506 + 2024-01-04T12:42:51Z + com_2434_73555 + col_2434_73557 + openaire + 2024-01-29T16:56:50.632Z + + od______1261::ff2d9e058e7bea90a27f41c31078e601 + oai:air.unimi.it:2434/907506 + + + + + od______1261 + + + + + Ensuring tests of conservation interventions build on existing literature + + + + W.J. Sutherland + + + S.T. Alvarez-Castaneda + + + T. Amano + + + R. Ambrosini + + + P. Atkinson + + + J.M. Baxter + + + A.L. Bond + + + P.J. Boon + + + K.L. Buchanan + + + J. Barlow + + + G. Bogliani + + + O.M. Bragg + + + M. Burgman + + + M.W. Cadotte + + + M. Calver + + + S.J. Cooke + + + R.T. Corlett + + + V. Devictor + + + J.G. Ewen + + + M. Fisher + + + G. Freeman + + + E. Game + + + B.J. Godley + + + C. Gortazar + + + I.R. Hartley + + + D.L. Hawksworth + + + K.A. Hobson + + + M.-. Lu + + + B. Martin-Lopez + + + K. Ma + + + A. Machado + + + D. Mae + + + M. Mangiacotti + + + D.J. Mccafferty + + + V. Melfi + + + S. Molur + + + A.J. Moore + + + S.D. Murphy + + + D. Norri + + + A.P.E. van Oudenhoven + + + J. Power + + + E.C. Ree + + + M.W. Schwartz + + + I. Storch + + + C. Wordley + + + + + + 2020 + 2020 + 2022-06-20 + + eng + Wiley Blackwell Publishing + journal article + application/pdf + 2434/907506 + open access + + Conservation of Natural Resources + + + + + 191802 bytes + + https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf + + 10.1111/cobi.13555 + 32779884 + https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf + 0001 + 2020-01-01 + OPEN + eng + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index de69795f88..ef6370bf3a 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -244,4 +244,27 @@ public class XmlRecordFactoryTest { } + @Test + public void testIrisGuidelines4() throws DocumentException, IOException { + final ContextMapper contextMapper = new ContextMapper(); + + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); + + final Publication p = OBJECT_MAPPER + .readValue( + IOUtils.toString(getClass().getResourceAsStream("iris-odf-4.json")), + Publication.class); + + final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); + + assertNotNull(xml); + + final Document doc = new SAXReader().read(new StringReader(xml)); + + assertNotNull(doc); + System.out.println(doc.asXML()); + + } + } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/iris-odf-4.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/iris-odf-4.json new file mode 100644 index 0000000000..4c5ec97081 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/iris-odf-4.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|opendoar____::17326d10d511828f6b34fa6d751739e2","value":"Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1706638577436,"id":"50|od______1261::ff2d9e058e7bea90a27f41c31078e601","originalId":["oai:air.unimi.it:2434/907506","50|od______1261::ff2d9e058e7bea90a27f41c31078e601"],"pid":[{"value":"2434/907506","qualifier":{"classid":"handle","classname":"Handle","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"","dateoftransformation":"2024-01-29T16:56:50.632Z","extraInfo":[],"oaiprovenance":null,"measures":null,"processingchargeamount":null,"processingchargecurrency":null,"author":[{"fullname":"W.J. Sutherland","name":"W. J.","surname":"Sutherland","rank":1,"pid":[],"affiliation":[]},{"fullname":"S.T. Alvarez-Castaneda","name":"S. T.","surname":"Alvarez-Castaneda","rank":2,"pid":[],"affiliation":[]},{"fullname":"T. Amano","name":"T.","surname":"Amano","rank":3,"pid":[],"affiliation":[]},{"fullname":"R. Ambrosini","name":"R.","surname":"Ambrosini","rank":4,"pid":[],"affiliation":[]},{"fullname":"P. Atkinson","name":"P.","surname":"Atkinson","rank":5,"pid":[],"affiliation":[]},{"fullname":"J.M. Baxter","name":"J. M.","surname":"Baxter","rank":6,"pid":[],"affiliation":[]},{"fullname":"A.L. Bond","name":"A. L.","surname":"Bond","rank":7,"pid":[],"affiliation":[]},{"fullname":"P.J. Boon","name":"P. J.","surname":"Boon","rank":8,"pid":[],"affiliation":[]},{"fullname":"K.L. Buchanan","name":"K. L.","surname":"Buchanan","rank":9,"pid":[],"affiliation":[]},{"fullname":"J. Barlow","name":"J.","surname":"Barlow","rank":10,"pid":[],"affiliation":[]},{"fullname":"G. Bogliani","name":"G.","surname":"Bogliani","rank":11,"pid":[],"affiliation":[]},{"fullname":"O.M. Bragg","name":"O. M.","surname":"Bragg","rank":12,"pid":[],"affiliation":[]},{"fullname":"M. Burgman","name":"M.","surname":"Burgman","rank":13,"pid":[],"affiliation":[]},{"fullname":"M.W. Cadotte","name":"M. W.","surname":"Cadotte","rank":14,"pid":[],"affiliation":[]},{"fullname":"M. Calver","name":"M.","surname":"Calver","rank":15,"pid":[],"affiliation":[]},{"fullname":"S.J. Cooke","name":"S. J.","surname":"Cooke","rank":16,"pid":[],"affiliation":[]},{"fullname":"R.T. Corlett","name":"R. T.","surname":"Corlett","rank":17,"pid":[],"affiliation":[]},{"fullname":"V. Devictor","name":"V.","surname":"Devictor","rank":18,"pid":[],"affiliation":[]},{"fullname":"J.G. Ewen","name":"J. G.","surname":"Ewen","rank":19,"pid":[],"affiliation":[]},{"fullname":"M. Fisher","name":"M.","surname":"Fisher","rank":20,"pid":[],"affiliation":[]},{"fullname":"G. Freeman","name":"G.","surname":"Freeman","rank":21,"pid":[],"affiliation":[]},{"fullname":"E. Game","name":"E.","surname":"Game","rank":22,"pid":[],"affiliation":[]},{"fullname":"B.J. Godley","name":"B. J.","surname":"Godley","rank":23,"pid":[],"affiliation":[]},{"fullname":"C. Gortazar","name":"C.","surname":"Gortazar","rank":24,"pid":[],"affiliation":[]},{"fullname":"I.R. Hartley","name":"I. R.","surname":"Hartley","rank":25,"pid":[],"affiliation":[]},{"fullname":"D.L. Hawksworth","name":"D. L.","surname":"Hawksworth","rank":26,"pid":[],"affiliation":[]},{"fullname":"K.A. Hobson","name":"K. A.","surname":"Hobson","rank":27,"pid":[],"affiliation":[]},{"fullname":"M.-. Lu","name":"M. -.","surname":"Lu","rank":28,"pid":[],"affiliation":[]},{"fullname":"B. Martin-Lopez","name":"B.","surname":"Martin-Lopez","rank":29,"pid":[],"affiliation":[]},{"fullname":"K. Ma","name":"K.","surname":"Ma","rank":30,"pid":[],"affiliation":[]},{"fullname":"A. Machado","name":"A.","surname":"Machado","rank":31,"pid":[],"affiliation":[]},{"fullname":"D. Mae","name":"D.","surname":"Mae","rank":32,"pid":[],"affiliation":[]},{"fullname":"M. Mangiacotti","name":"M.","surname":"Mangiacotti","rank":33,"pid":[],"affiliation":[]},{"fullname":"D.J. Mccafferty","name":"D. J.","surname":"Mccafferty","rank":34,"pid":[],"affiliation":[]},{"fullname":"V. Melfi","name":"V.","surname":"Melfi","rank":35,"pid":[],"affiliation":[]},{"fullname":"S. Molur","name":"S.","surname":"Molur","rank":36,"pid":[],"affiliation":[]},{"fullname":"A.J. Moore","name":"A. J.","surname":"Moore","rank":37,"pid":[],"affiliation":[]},{"fullname":"S.D. Murphy","name":"S. D.","surname":"Murphy","rank":38,"pid":[],"affiliation":[]},{"fullname":"D. Norri","name":"D.","surname":"Norri","rank":39,"pid":[],"affiliation":[]},{"fullname":"A.P.E. van Oudenhoven","name":"A. P. E.","surname":"Oudenhoven","rank":40,"pid":[],"affiliation":[]},{"fullname":"J. Power","name":"J.","surname":"Power","rank":41,"pid":[],"affiliation":[]},{"fullname":"E.C. Ree","name":"E. C.","surname":"Ree","rank":42,"pid":[],"affiliation":[]},{"fullname":"M.W. Schwartz","name":"M. W.","surname":"Schwartz","rank":43,"pid":[],"affiliation":[]},{"fullname":"I. Storch","name":"I.","surname":"Storch","rank":44,"pid":[],"affiliation":[]},{"fullname":"C. Wordley","name":"C.","surname":"Wordley","rank":45,"pid":[],"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"metaResourceType":null,"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Conservation of Natural Resources","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ensuring tests of conservation interventions build on existing literature","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"2020","qualifier":{"classid":"Accepted","classname":"Accepted","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2020","qualifier":{"classid":"Issued","classname":"Issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2022-06-20","qualifier":{"classid":"Available","classname":"Available","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[],"dateofacceptance":{"value":"2020-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Wiley Blackwell Publishing","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[{"value":"https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"format":[{"value":"application/pdf","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":{"classid":"journal article","classname":"journal article","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"instanceTypeMapping":[{"originalType":"http://purl.org/coar/resource_type/c_6501","typeCode":null,"typeLabel":null,"vocabularyName":"openaire::coar_resource_types_3_1"}],"hostedby":{"key":"10|opendoar____::17326d10d511828f6b34fa6d751739e2","value":"Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano","dataInfo":null},"url":["https://hdl.handle.net/2434/907506","https://doi.org/10.1111/cobi.13555"],"distributionlocation":null,"collectedfrom":{"key":"10|opendoar____::17326d10d511828f6b34fa6d751739e2","value":"Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano","dataInfo":null},"pid":[{"value":"2434/907506","qualifier":{"classid":"handle","classname":"Handle","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":[{"value":"32779884","qualifier":{"classid":"pmid","classname":"pmid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"10.1111/cobi.13555","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2020-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"","classname":"","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"measures":null,"fulltext":"https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf"}],"eoscifguidelines":[],"openAccessColor":null,"publiclyFunded":null,"journal":null,"isGreen":null,"isInDiamondJournal":null} From 42f550630667f5d3209edf5d55e72a8b3f3bdff7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Feb 2024 09:44:56 +0200 Subject: [PATCH 51/56] [orcid enrichment] fixed directory cleanup before distcp --- .../eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml index bbd3581c57..3493ecb2fa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml @@ -49,10 +49,10 @@ - - - - + + + + From bb82052c4078657e23d47e210a0c9ad4e2aaec4d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Feb 2024 14:59:06 +0200 Subject: [PATCH 52/56] [graph cleaning] rule out datasources without an officialname --- .../dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 0124e96fc1..f01f90fe4f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -312,7 +312,8 @@ public class GraphCleaningFunctions extends CleaningFunctions { } if (value instanceof Datasource) { - // nothing to evaluate here + final Datasource d = (Datasource) value; + return Objects.nonNull(d.getOfficialname()) && StringUtils.isNotBlank(d.getOfficialname().getValue()); } else if (value instanceof Project) { final Project p = (Project) value; return Objects.nonNull(p.getCode()) && StringUtils.isNotBlank(p.getCode().getValue()); From 009dcf6aea063ff3ebd16967b08791a6b5e1c812 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Feb 2024 16:43:40 +0200 Subject: [PATCH 53/56] [actiosets] introduced support for the PromoteAction strategy --- .../actionmanager/promote/PromoteAction.java | 39 +++++++++++++++++++ .../PromoteActionPayloadForGraphTableJob.java | 29 ++++++++++---- .../PromoteActionPayloadFunctions.java | 4 +- ...load_for_graph_table_input_parameters.json | 6 +++ .../wf/dataset/oozie_app/workflow.xml | 2 + .../wf/datasource/oozie_app/workflow.xml | 1 + .../wf/organization/oozie_app/workflow.xml | 1 + .../oozie_app/workflow.xml | 2 + .../wf/project/oozie_app/workflow.xml | 1 + .../wf/publication/oozie_app/workflow.xml | 2 + .../wf/relation/oozie_app/workflow.xml | 1 + .../wf/software/oozie_app/workflow.xml | 2 + .../PromoteActionPayloadFunctionsTest.java | 4 +- 13 files changed, 84 insertions(+), 10 deletions(-) create mode 100644 dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java new file mode 100644 index 0000000000..163a8708e2 --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024. + * SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package eu.dnetlib.dhp.actionmanager.promote; + +/** Encodes the Actionset promotion strategies */ +public class PromoteAction { + + /** The supported actionset promotion strategies + * + * ENRICH: promotes only records in the actionset matching another record in the + * graph and enriches them applying the given MergeAndGet strategy + * UPSERT: promotes all the records in an actionset, matching records are updated + * using the given MergeAndGet strategy, the non-matching record as inserted as they are. + */ + public enum Strategy { + ENRICH, UPSERT + } + + /** + * Returns the string representation of the join type implementing the given PromoteAction. + * + * @param strategy the strategy to be used to promote the Actionset contents + * @return the join type used to implement the promotion strategy + */ + public static String joinTypeForStrategy(PromoteAction.Strategy strategy) { + switch (strategy) { + case ENRICH: + return "join"; + case UPSERT: + return "full_outer"; + default: + throw new IllegalStateException("unsupported PromoteAction: " + strategy.toString()); + } + } +} diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java index 7b024bea8b..56cbda4d67 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java @@ -67,8 +67,9 @@ public class PromoteActionPayloadForGraphTableJob { String outputGraphTablePath = parser.get("outputGraphTablePath"); logger.info("outputGraphTablePath: {}", outputGraphTablePath); - MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase()); - logger.info("strategy: {}", strategy); + MergeAndGet.Strategy mergeAndGetStrategy = MergeAndGet.Strategy + .valueOf(parser.get("mergeAndGetStrategy").toUpperCase()); + logger.info("mergeAndGetStrategy: {}", mergeAndGetStrategy); Boolean shouldGroupById = Optional .ofNullable(parser.get("shouldGroupById")) @@ -76,6 +77,12 @@ public class PromoteActionPayloadForGraphTableJob { .orElse(true); logger.info("shouldGroupById: {}", shouldGroupById); + PromoteAction.Strategy promoteActionStrategy = Optional + .ofNullable(parser.get("promoteActionStrategy")) + .map(PromoteAction.Strategy::valueOf) + .orElse(PromoteAction.Strategy.UPSERT); + logger.info("promoteActionStrategy: {}", promoteActionStrategy); + @SuppressWarnings("unchecked") Class rowClazz = (Class) Class.forName(graphTableClassName); @SuppressWarnings("unchecked") @@ -97,7 +104,8 @@ public class PromoteActionPayloadForGraphTableJob { inputGraphTablePath, inputActionPayloadPath, outputGraphTablePath, - strategy, + mergeAndGetStrategy, + promoteActionStrategy, rowClazz, actionPayloadClazz, shouldGroupById); @@ -124,14 +132,16 @@ public class PromoteActionPayloadForGraphTableJob { String inputGraphTablePath, String inputActionPayloadPath, String outputGraphTablePath, - MergeAndGet.Strategy strategy, + MergeAndGet.Strategy mergeAndGetStrategy, + PromoteAction.Strategy promoteActionStrategy, Class rowClazz, Class actionPayloadClazz, Boolean shouldGroupById) { Dataset rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz); Dataset actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz); Dataset result = promoteActionPayloadForGraphTable( - rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz, shouldGroupById) + rowDS, actionPayloadDS, mergeAndGetStrategy, promoteActionStrategy, rowClazz, actionPayloadClazz, + shouldGroupById) .map((MapFunction) value -> value, Encoders.bean(rowClazz)); saveGraphTable(result, outputGraphTablePath); @@ -183,7 +193,8 @@ public class PromoteActionPayloadForGraphTableJob { private static Dataset promoteActionPayloadForGraphTable( Dataset rowDS, Dataset actionPayloadDS, - MergeAndGet.Strategy strategy, + MergeAndGet.Strategy mergeAndGetStrategy, + PromoteAction.Strategy promoteActionStrategy, Class rowClazz, Class actionPayloadClazz, Boolean shouldGroupById) { @@ -195,8 +206,9 @@ public class PromoteActionPayloadForGraphTableJob { SerializableSupplier> rowIdFn = ModelSupport::idFn; SerializableSupplier> actionPayloadIdFn = ModelSupport::idFn; - SerializableSupplier> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy); - SerializableSupplier> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy); + SerializableSupplier> mergeRowWithActionPayloadAndGetFn = MergeAndGet + .functionFor(mergeAndGetStrategy); + SerializableSupplier> mergeRowsAndGetFn = MergeAndGet.functionFor(mergeAndGetStrategy); SerializableSupplier zeroFn = zeroFn(rowClazz); SerializableSupplier> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSourceAndTarget; @@ -207,6 +219,7 @@ public class PromoteActionPayloadForGraphTableJob { rowIdFn, actionPayloadIdFn, mergeRowWithActionPayloadAndGetFn, + promoteActionStrategy, rowClazz, actionPayloadClazz); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java index d799c646bc..f0b094240e 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java @@ -34,6 +34,7 @@ public class PromoteActionPayloadFunctions { * @param rowIdFn Function used to get the id of graph table row * @param actionPayloadIdFn Function used to get id of action payload instance * @param mergeAndGetFn Function used to merge graph table row and action payload instance + * @param promoteActionStrategy the Actionset promotion strategy * @param rowClazz Class of graph table * @param actionPayloadClazz Class of action payload * @param Type of graph table row @@ -46,6 +47,7 @@ public class PromoteActionPayloadFunctions { SerializableSupplier> rowIdFn, SerializableSupplier> actionPayloadIdFn, SerializableSupplier> mergeAndGetFn, + PromoteAction.Strategy promoteActionStrategy, Class rowClazz, Class actionPayloadClazz) { if (!isSubClass(rowClazz, actionPayloadClazz)) { @@ -61,7 +63,7 @@ public class PromoteActionPayloadFunctions { .joinWith( actionPayloadWithIdDS, rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")), - "full_outer") + PromoteAction.joinTypeForStrategy(promoteActionStrategy)) .map( (MapFunction, Tuple2>, G>) value -> { Optional rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2); diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json index 00c9404ef0..81a7c77d7b 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json @@ -41,6 +41,12 @@ "paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET", "paramRequired": true }, + { + "paramName": "pas", + "paramLongName": "promoteActionStrategy", + "paramDescription": "strategy for promoting the actionset contents into the graph tables, ENRICH or UPSERT (default)", + "paramRequired": false + }, { "paramName": "sgid", "paramLongName": "shouldGroupById", diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml index 4f374a75a0..5401b45cac 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml @@ -115,6 +115,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Dataset --outputGraphTablePath${workingDir}/dataset --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -167,6 +168,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/dataset --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml index c85ba4ac1c..f9bd66ae3a 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml @@ -106,6 +106,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Datasource --outputGraphTablePath${outputGraphRootPath}/datasource --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml index 412cad70bc..ebfdeee312 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml @@ -106,6 +106,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Organization --outputGraphTablePath${outputGraphRootPath}/organization --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml index 7bac760e2c..02399ed9b9 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml @@ -114,6 +114,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputGraphTablePath${workingDir}/otherresearchproduct --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -166,6 +167,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/otherresearchproduct --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml index daf48e9d78..57c2357b4a 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml @@ -106,6 +106,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Project --outputGraphTablePath${outputGraphRootPath}/project --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml index b76dc82f14..92b114776a 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml @@ -115,6 +115,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Publication --outputGraphTablePath${workingDir}/publication --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -167,6 +168,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/publication --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml index d3086dbdc0..e9e5f0b454 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml @@ -107,6 +107,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Relation --outputGraphTablePath${outputGraphRootPath}/relation --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml index b5673b18f4..1d36ddf94b 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml @@ -114,6 +114,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Software --outputGraphTablePath${workingDir}/software --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -166,6 +167,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/software --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java index cbc1bfaba7..777e2fa1ce 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java @@ -54,7 +54,7 @@ public class PromoteActionPayloadFunctionsTest { RuntimeException.class, () -> PromoteActionPayloadFunctions .joinGraphTableWithActionPayloadAndMerge( - null, null, null, null, null, OafImplSubSub.class, OafImpl.class)); + null, null, null, null, null, null, OafImplSubSub.class, OafImpl.class)); } @Test @@ -104,6 +104,7 @@ public class PromoteActionPayloadFunctionsTest { rowIdFn, actionPayloadIdFn, mergeAndGetFn, + PromoteAction.Strategy.UPSERT, OafImplSubSub.class, OafImplSubSub.class) .collectAsList(); @@ -183,6 +184,7 @@ public class PromoteActionPayloadFunctionsTest { rowIdFn, actionPayloadIdFn, mergeAndGetFn, + PromoteAction.Strategy.UPSERT, OafImplSubSub.class, OafImplSub.class) .collectAsList(); From fd17c1f17c4470be0e45dfdf3c13255087ce8e59 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Feb 2024 16:55:36 +0200 Subject: [PATCH 54/56] [actiosets] fixed join type --- .../eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java index 163a8708e2..8fb9c8c953 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java @@ -29,7 +29,7 @@ public class PromoteAction { public static String joinTypeForStrategy(PromoteAction.Strategy strategy) { switch (strategy) { case ENRICH: - return "join"; + return "left_outer"; case UPSERT: return "full_outer"; default: From b19643f6eb0d0f7bf798ffbff0427a6c2c9671e0 Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Thu, 8 Feb 2024 15:12:16 +0100 Subject: [PATCH 55/56] Dedup aliases, created when a dedup in a previous build has been merged in a new dedup, need to be marked as "deletedbyinference", since they are "merged" in the new dedup --- .../dhp/oa/dedup/DedupRecordFactory.java | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index eddfba309d..d5b106c81b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -122,22 +122,41 @@ public class DedupRecordFactory { } return Stream - .concat(Stream.of(agg.getDedupId()), agg.aliases.stream()) - .map(id -> { - try { - OafEntity res = (OafEntity) BeanUtils.cloneBean(agg.entity); - res.setId(id); - res.setDataInfo(dataInfo); - res.setLastupdatetimestamp(ts); - return res; - } catch (Exception e) { - throw new RuntimeException(e); - } - }) + .concat( + Stream + .of(agg.getDedupId()) + .map(id -> createDedupOafEntity(id, agg.entity, dataInfo, ts)), + agg.aliases + .stream() + .map(id -> createMergedDedupAliasOafEntity(id, agg.entity, dataInfo, ts))) .iterator(); }, beanEncoder); } + private static OafEntity createDedupOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) { + try { + OafEntity res = (OafEntity) BeanUtils.cloneBean(base); + res.setId(id); + res.setDataInfo(dataInfo); + res.setLastupdatetimestamp(ts); + return res; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static OafEntity createMergedDedupAliasOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) { + try { + OafEntity res = createDedupOafEntity(id, base, dataInfo, ts); + DataInfo ds = (DataInfo) BeanUtils.cloneBean(dataInfo); + ds.setDeletedbyinference(true); + res.setDataInfo(ds); + return res; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) { if (duplicate == null) { From d85d2df6ad989abe32a3ca3b509c2c264e8d3553 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 9 Feb 2024 10:19:53 +0100 Subject: [PATCH 56/56] [graph raw] fixed mapping of the original resource type from the Datacite format --- .../dhp/oa/graph/raw/OdfToOafMapper.java | 22 +++++-- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 32 +++++++++- .../dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml | 59 +++++++++++++++++++ 3 files changed, 106 insertions(+), 7 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 08529125c8..57e0d29555 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -238,11 +238,23 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { (Element) doc .selectSingleNode( "//*[local-name()='metadata']/*[local-name() = 'resource']/*[local-name() = 'resourceType']")) - .map(element -> { - final String resourceTypeURI = element.attributeValue("uri"); - final String resourceTypeAnyURI = element.attributeValue("anyURI"); - final String resourceTypeTxt = element.getText(); - final String resourceTypeGeneral = element.attributeValue("resourceTypeGeneral"); + .map(e -> { + final String resourceTypeURI = Optional + .ofNullable(e.attributeValue("uri")) + .filter(StringUtils::isNotBlank) + .orElse(null); + final String resourceTypeAnyURI = Optional + .ofNullable(e.attributeValue("anyURI")) + .filter(StringUtils::isNotBlank) + .orElse(null); + final String resourceTypeTxt = Optional + .ofNullable(e.getText()) + .filter(StringUtils::isNotBlank) + .orElse(null); + final String resourceTypeGeneral = Optional + .ofNullable(e.attributeValue("resourceTypeGeneral")) + .filter(StringUtils::isNotBlank) + .orElse(null); return ObjectUtils .firstNonNull(resourceTypeURI, resourceTypeAnyURI, resourceTypeTxt, resourceTypeGeneral); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index bbb8e7d573..a6bbd30cff 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1171,6 +1171,34 @@ class MappersTest { } + @Test + void test_Zenodo2() throws IOException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_zenodo2.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + assertEquals(3, list.size()); + Publication p = cleanup((Publication) list.get(0), vocs); + + assertNotNull(p.getInstance()); + assertEquals(1, p.getInstance().size()); + + final Instance instance = p.getInstance().get(0); + + assertNotNull(instance.getInstanceTypeMapping()); + assertEquals(1, instance.getInstanceTypeMapping().size()); + + Optional coarType = instance + .getInstanceTypeMapping() + .stream() + .filter(itm -> ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName())) + .findFirst(); + + assertTrue(coarType.isPresent()); + assertNotNull(coarType.get().getOriginalType()); + assertNull(coarType.get().getTypeCode()); + assertNull(coarType.get().getTypeLabel()); + } + @Test void testROHub2() throws IOException { final String xml = IOUtils @@ -1229,7 +1257,7 @@ class MappersTest { } @Test - public void testD4ScienceTraining() throws IOException { + void testD4ScienceTraining() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-1-training.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -1240,7 +1268,7 @@ class MappersTest { } @Test - public void testD4ScienceDataset() throws IOException { + void testD4ScienceDataset() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-2-dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml new file mode 100644 index 0000000000..ebe105de86 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml @@ -0,0 +1,59 @@ + + +
+ oai:zenodo.org:1596086 + 2020-01-20T13:50:28Z + openaire + 2024-02-08T11:03:10.994Z + od______2659::036d5555a6688ed00c8d0da97bdece3b + 2024-02-08T11:03:10.994Z + 2024-02-08T11:03:10.994Z +
+ + + https://zenodo.org/record/1596086 + + + + Bonney, T. G. + T. G. + Bonney + + + + Ice Blocks on a Moraine + + Zenodo + 1889 + + 1889-08-22 + + + + 10.1038/040391a0 + + + Creative Commons Zero v1.0 Universal + Open Access + + + n/a + + + 0001 + 1889-08-22 + OPEN + http://creativecommons.org/publicdomain/zero/1.0/legalcode + + + + +