From 06543434797f723e9b26ee2161a2f6d1a13bbc3b Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Thu, 5 Jan 2023 10:36:32 +0200 Subject: [PATCH] Added usage-stats-promote --- .../usagerawdata/export/PiwikStatsDB.java | 38 ++++++------ .../export/UsageStatsExporter.java | 2 +- dhp-workflows/dhp-usage-stats-promote/pom.xml | 32 ++++++++++ .../dhp-usage-stats-promote/runworkflow.sh | 1 + .../oozie_app/config-default.xml | 30 ++++++++++ .../oozie_app/updateProductionViews.sh | 16 +++++ .../usagestatspromote/oozie_app/workflow.xml | 59 +++++++++++++++++++ 7 files changed, 158 insertions(+), 20 deletions(-) create mode 100755 dhp-workflows/dhp-usage-stats-promote/pom.xml create mode 100755 dhp-workflows/dhp-usage-stats-promote/runworkflow.sh create mode 100755 dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/config-default.xml create mode 100755 dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/updateProductionViews.sh create mode 100755 dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/PiwikStatsDB.java index b320b3007..747bf55a1 100755 --- a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/PiwikStatsDB.java +++ b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/PiwikStatsDB.java @@ -152,25 +152,25 @@ public class PiwikStatsDB { ReadCounterRobotsList counterRobots = new ReadCounterRobotsList(this.getCounterRobotsURL()); this.robotsList = counterRobots.getRobotsPatterns(); -// logger.info("Processing repository logs"); -// processRepositoryLog(); -// logger.info("Repository logs process done"); -// -// logger.info("Removing double clicks"); -// removeDoubleClicks(); -// logger.info("Removing double clicks done"); -// -// logger.info("Cleaning oai"); -// cleanOAI(); -// logger.info("Cleaning oai done"); -// -// logger.info("Processing portal logs"); -// processPortalLog(); -// logger.info("Portal logs process done"); -// -// logger.info("Processing portal usagestats"); -// portalLogs(); -// logger.info("Portal usagestats process done"); + logger.info("Processing repository logs"); + processRepositoryLog(); + logger.info("Repository logs process done"); + + logger.info("Removing double clicks"); + removeDoubleClicks(); + logger.info("Removing double clicks done"); + + logger.info("Cleaning oai"); + cleanOAI(); + logger.info("Cleaning oai done"); + + logger.info("Processing portal logs"); + processPortalLog(); + logger.info("Portal logs process done"); + + logger.info("Processing portal usagestats"); + portalLogs(); + logger.info("Portal usagestats process done"); logger.info("Process Episciences"); processEpisciencesLog(); diff --git a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/UsageStatsExporter.java index 33fad50bd..58909ef4e 100755 --- a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/UsageStatsExporter.java +++ b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/UsageStatsExporter.java @@ -108,7 +108,7 @@ public class UsageStatsExporter { logger.info("LaReferencia logs done"); } - IrusStats irusstats = new IrusStats(ExecuteWorkflow.irusUKBaseURL); + IrusStats irusstats = new IrusStats(ExecuteWorkflow.irusUKBaseURL_R5); if (ExecuteWorkflow.irusCreateTablesEmptyDirs) { logger.info("Creating Irus Stats tables"); irusstats.createTables(); diff --git a/dhp-workflows/dhp-usage-stats-promote/pom.xml b/dhp-workflows/dhp-usage-stats-promote/pom.xml new file mode 100755 index 000000000..b671e618c --- /dev/null +++ b/dhp-workflows/dhp-usage-stats-promote/pom.xml @@ -0,0 +1,32 @@ + + + + dhp-workflows + eu.dnetlib.dhp + 1.2.4-SNAPSHOT + + 4.0.0 + dhp-usage-stats-promote + + + org.apache.spark + spark-core_2.11 + + + org.apache.spark + spark-sql_2.11 + + + + + + pl.project13.maven + git-commit-id-plugin + 2.1.11 + + false + + + + + diff --git a/dhp-workflows/dhp-usage-stats-promote/runworkflow.sh b/dhp-workflows/dhp-usage-stats-promote/runworkflow.sh new file mode 100755 index 000000000..2e9b7a163 --- /dev/null +++ b/dhp-workflows/dhp-usage-stats-promote/runworkflow.sh @@ -0,0 +1 @@ +mvn clean package -Poozie-package,deploy,run -Dworkflow.source.dir=eu/dnetlib/dhp/oa/graph/usagestatspromote \ No newline at end of file diff --git a/dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/config-default.xml b/dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/config-default.xml new file mode 100755 index 000000000..3fa00d053 --- /dev/null +++ b/dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + ${jobTracker} + + + nameNode + ${nameNode} + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hive_jdbc_url + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228 + + + oozie.wf.workflow.notification.url + {serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status + + \ No newline at end of file diff --git a/dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/updateProductionViews.sh b/dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/updateProductionViews.sh new file mode 100755 index 000000000..3e510e87e --- /dev/null +++ b/dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/updateProductionViews.sh @@ -0,0 +1,16 @@ +export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs +export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) +if ! [ -L $link_folder ] +then + rm -Rf "$link_folder" + ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} +fi + +export SOURCE=$1 +export PRODUCTION=$2 + +echo "Updating ${PRODUCTION} database" +impala-shell -q "create database if not exists ${PRODUCTION}" +impala-shell -d ${PRODUCTION} -q "show tables" --delimited | sed "s/^/drop view if exists ${PRODUCTION}./" | sed "s/$/;/" | impala-shell -c -f - +impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${PRODUCTION}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f - +echo "Production db ready!" \ No newline at end of file diff --git a/dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/workflow.xml b/dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/workflow.xml new file mode 100755 index 000000000..93bad4000 --- /dev/null +++ b/dhp-workflows/dhp-usage-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatspromote/oozie_app/workflow.xml @@ -0,0 +1,59 @@ + + + + usage_stats_db_name + the target usage stats database name + + + usage_stats_db_production_name + the name of the public production usage stats database + + + hive_metastore_uris + hive server metastore URIs + + + hive_jdbc_url + hive server jdbc url + + + hive_timeout + the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds. + + + + + ${jobTracker} + ${nameNode} + + + hive.metastore.uris + ${hive_metastore_uris} + + + hive.txn.timeout + ${hive_timeout} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${jobTracker} + ${nameNode} + updateProductionViews.sh + ${usage_stats_db_name} + ${usage_stats_db_production_name} + updateProductionViews.sh + + + + + + \ No newline at end of file