From a3948c1f6eeb958cb66a5fe212b8055c33a4ccff Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 18 Jun 2021 15:14:08 +0200 Subject: [PATCH] cleanup old doiboost workflows --- .../dhp/doiboost/oozie_app/config-default.xml | 42 -- .../dhp/doiboost/oozie_app/workflow.xml | 372 ------------------ 2 files changed, 414 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml deleted file mode 100644 index 508202e30..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml deleted file mode 100644 index 77aa595f5..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml +++ /dev/null @@ -1,372 +0,0 @@ - - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorIntersectionMemory - memory for individual executor - - - - sparkExecutorCores - number of cores used by single executor - - - - - - workingPath - the working Path - - - - hostedByMapPath - the hostedByMap Path - - - outputPath - the Path of the sequence file action set - - - - - - inputPathCrossref - the Crossref input path - - - crossrefTimestamp - Timestamp for the Crossref incremental Harvesting - - - esServer - elasticsearch server url for the Crossref Harvesting - - - esIndex - elasticsearch index name for the Crossref Harvesting - - - - - MAGDumpPath - the MAG dump working path - - - - inputPathMAG - the MAG working path - - - - - - inputPathUnpayWall - the UnpayWall working path - - - - - inputPathOrcid - the ORCID input path - - - - workingPathOrcid - the ORCID working path - - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - ${wf:conf('resumeFrom') eq 'ConvertCrossrefToOAF'} - ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} - ${wf:conf('resumeFrom') eq 'PreprocessMag'} - ${wf:conf('resumeFrom') eq 'PreprocessUW'} - ${wf:conf('resumeFrom') eq 'PreprocessORCID'} - ${wf:conf('resumeFrom') eq 'CreateDOIBoost'} - ${wf:conf('resumeFrom') eq 'GenerateActionSet'} - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - eu.dnetlib.doiboost.crossref.CrossrefImporter - --targetPath${inputPathCrossref}/index_update - --namenode${nameNode} - --esServer${esServer} - --esIndex${esIndex} - --timestamp${crossrefTimestamp} - - - - - - - - - - - yarn-cluster - cluster - GenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.CrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --workingPath${inputPathCrossref} - --masteryarn-cluster - - - - - - - - - - - - - - - - - - - - - - - - - - - - - yarn-cluster - cluster - Convert Mag to Dataset - eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${MAGDumpPath} - --targetPath${inputPathMAG}/dataset - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - ConvertCrossrefToOAF - eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${inputPathCrossref}/crossref_ds - --targetPath${workingPath} - --masteryarn-cluster - - - - - - - - yarn-cluster - cluster - Convert Mag to OAF Dataset - eu.dnetlib.doiboost.mag.SparkProcessMAG - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorIntersectionMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${inputPathMAG}/dataset - --workingPath${inputPathMAG}/process - --targetPath${workingPath} - --masteryarn-cluster - - - - - - - - - - yarn-cluster - cluster - Convert UnpayWall to Dataset - eu.dnetlib.doiboost.uw.SparkMapUnpayWallToOAF - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${inputPathUnpayWall}/uw_extracted - --targetPath${workingPath}/uwPublication - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - Convert ORCID to Dataset - eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${inputPathOrcid} - --workingPath${workingPathOrcid} - --targetPath${workingPath}/orcidPublication - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - Create DOIBoost Infospace - eu.dnetlib.doiboost.SparkGenerateDoiBoost - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorIntersectionMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --hostedByMapPath${hostedByMapPath} - --affiliationPath${inputPathMAG}/dataset/Affiliations - --paperAffiliationPath${inputPathMAG}/dataset/PaperAuthorAffiliations - --workingPath${workingPath} - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - Generate DOIBoost ActionSet - eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --dbPublicationPath${workingPath}/doiBoostPublicationFiltered - --dbDatasetPath${workingPath}/crossrefDataset - --crossRefRelation${workingPath}/crossrefRelation - --dbaffiliationRelationPath${workingPath}/doiBoostPublicationAffiliation - --dbOrganizationPath${workingPath}/doiBoostOrganization - --targetPath${workingPath}/actionDataSet - --sFilePath${outputPath} - --masteryarn-cluster - - - - - - - \ No newline at end of file