diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ExecuteWorkflow.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ExecuteWorkflow.java
deleted file mode 100755
index a05424f2a..000000000
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ExecuteWorkflow.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package eu.dnetlib.oa.graph.usagestatsbuild.export;
-
-import java.text.SimpleDateFormat;
-import java.util.Calendar;
-import java.util.Date;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.log4j.BasicConfigurator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-
-/**
- * @author D. Pierrakos, S. Zoupanos
- */
-public class ExecuteWorkflow {
-
-// static String matomoAuthToken;
- static String portalMatomoID;
-// static String irusUKBaseURL;
-// static String lareferenciaBaseURL;
-// static String lareferenciaAuthToken;
- static String dbHiveUrl;
- static String dbImpalaUrl;
- static String usageRawDataDBSchema;
- static String usageStatsDBSchema;
- static String usagestatsPermanentDBSchema;
- static String statsDBSchema;
- static boolean recreateDbAndTables;
-
- static boolean processPiwikLogs;
- static boolean processLaReferenciaLogs;
-
- static boolean irusProcessStats;
-
- static boolean sarcProcessStats;
-
- static boolean finalizeStats;
- static boolean finalTablesVisibleToImpala;
-
- static int numberOfDownloadThreads;
-
- private static final Logger logger = LoggerFactory.getLogger(PiwikStatsDB.class);
-
- public static void main(String args[]) throws Exception {
-
- // Sending the logs to the console
- BasicConfigurator.configure();
-
- final ArgumentApplicationParser parser = new ArgumentApplicationParser(
- IOUtils
- .toString(
- UsageStatsExporter.class
- .getResourceAsStream(
- "/eu/dnetlib/dhp/oa/graph/usagestatsbuild/export/usagestatsbuild_parameters.json")));
- parser.parseArgument(args);
-
- // Setting up the initial parameters
-// matomoAuthToken = parser.get("matomoAuthToken");
-// matomoBaseURL = parser.get("matomoBaseURL");
- portalMatomoID = parser.get("portalMatomoID");
-// irusUKBaseURL = parser.get("irusUKBaseURL");
-// lareferenciaBaseURL = parser.get("lareferenciaBaseURL");
-// lareferenciaAuthToken = parser.get("lareferenciaAuthToken");
-
- dbHiveUrl = parser.get("dbHiveUrl");
- dbImpalaUrl = parser.get("dbImpalaUrl");
- usageRawDataDBSchema = parser.get("usageRawDataDBSchema");
- usageStatsDBSchema = parser.get("usageStatsDBSchema");
- usagestatsPermanentDBSchema = parser.get("usagestatsPermanentDBSchema");
- statsDBSchema = parser.get("statsDBSchema");
-
- if (parser.get("processPiwikLogs").toLowerCase().equals("true")) {
- processPiwikLogs = true;
- } else {
- processPiwikLogs = false;
- }
-
-// String startingLogPeriodStr = parser.get("startingLogPeriod");
-// Date startingLogPeriodDate = new SimpleDateFormat("MM/yyyy").parse(startingLogPeriodStr);
-// startingLogPeriod = startingLogPeriodStr(startingLogPeriodDate);
-//
-// String endingLogPeriodStr = parser.get("endingLogPeriod");
-// Date endingLogPeriodDate = new SimpleDateFormat("MM/yyyy").parse(endingLogPeriodStr);
-// endingLogPeriod = startingLogPeriodStr(endingLogPeriodDate);
-
- if (parser.get("recreateDbAndTables").toLowerCase().equals("true")) {
- recreateDbAndTables = true;
- } else {
- recreateDbAndTables = false;
- }
-
- if (parser.get("processLaReferenciaLogs").toLowerCase().equals("true")) {
- processLaReferenciaLogs = true;
- } else {
- processLaReferenciaLogs = false;
- }
-
- if (parser.get("irusProcessStats").toLowerCase().equals("true")) {
- irusProcessStats = true;
- } else {
- irusProcessStats = false;
- }
-
- if (parser.get("sarcProcessStats").toLowerCase().equals("true")) {
- sarcProcessStats = true;
- } else {
- sarcProcessStats = false;
- }
-
- if (parser.get("finalizeStats").toLowerCase().equals("true")) {
- finalizeStats = true;
- } else {
- finalizeStats = false;
- }
- if (parser.get("finalTablesVisibleToImpala").toLowerCase().equals("true")) {
- finalTablesVisibleToImpala = true;
- } else {
- numberOfDownloadThreads = Integer.parseInt(parser.get("numberOfDownloadThreads"));
- }
-
- UsageStatsExporter usagestatsExport = new UsageStatsExporter();
- usagestatsExport.export();
- }
-
- private static Calendar startingLogPeriodStr(Date date) {
-
- Calendar calendar = Calendar.getInstance();
- calendar.setTime(date);
- return calendar;
-
- }
-}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/IrusStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/IrusStats.java
deleted file mode 100755
index 831a8dde1..000000000
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/IrusStats.java
+++ /dev/null
@@ -1,95 +0,0 @@
-
-package eu.dnetlib.oa.graph.usagestatsbuild.export;
-
-import java.io.*;
-import java.net.URL;
-import java.net.URLConnection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.Statement;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.json.simple.JSONArray;
-import org.json.simple.JSONObject;
-import org.json.simple.parser.JSONParser;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * @author D. Pierrakos, S. Zoupanos
- */
-public class IrusStats {
-
- private String irusUKURL;
-
- private static final Logger logger = LoggerFactory.getLogger(IrusStats.class);
-
- public IrusStats() throws Exception {
- }
-
- public void processIrusStats() throws Exception {
- Statement stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- logger.info("Creating irus_downloads_stats_tmp table");
- String createDownloadsStats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + ".irus_downloads_stats_tmp "
- + "(`source` string, "
- + "`repository_id` string, "
- + "`result_id` string, "
- + "`date` string, "
- + "`count` bigint, "
- + "`openaire` bigint)";
- stmt.executeUpdate(createDownloadsStats);
- logger.info("Created irus_downloads_stats_tmp table");
-
- logger.info("Inserting into irus_downloads_stats_tmp");
- String insertDStats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".irus_downloads_stats_tmp "
- + "SELECT s.source, d.id AS repository_id, "
- + "ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date, s.count, '0' "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".sushilog s, "
- + ConnectDB.getStatsDBSchema() + ".datasource_oids d, "
- + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE s.repository=d.oid AND s.rid=ro.oid AND metric_type='ft_total' AND s.source='IRUS-UK'";
- stmt.executeUpdate(insertDStats);
- logger.info("Inserted into irus_downloads_stats_tmp");
-
- String createR5Stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + ".irus_R5_stats_tmp "
- + "(`source` string, "
- + "`repository_id` string, "
- + "`result_id` string, "
- + "`date` string, "
- + "`views` bigint, "
- + "`downloads` bigint, "
- + "`openaire` bigint)";
- stmt.executeUpdate(createR5Stats);
- logger.info("Created irus_R5_stats_tmp table");
-
- logger.info("Inserting into irus_R5_stats_tmp");
- String insertĪ”5Stats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".irus_R5_stats_tmp "
- + "SELECT s.source, d.id AS repository_id, "
- + "ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date, "
- + "(s.total_item_investigations-s.total_item_requests) as views, s.total_item_requests as downloads, '0' "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".sushilog_cop_r5 s, "
- + ConnectDB.getStatsDBSchema() + ".datasource_oids d, "
- + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE s.repository=d.oid AND s.rid=ro.oid AND s.source='IRUS-UK'";
- stmt.executeUpdate(insertĪ”5Stats);
- logger.info("Inserted into irus_R5_stats_tmp");
-
- stmt.close();
- // ConnectDB.getHiveConnection().close();
- }
- //// to add create table sushilog_cop_r5 as select * from openaire_prod_usage_raw.sushilog_cop_r5
- //// to add create table sushilog_cop_r5 as select * from openaire_prod_usage_raw.sushilog_cop_r5
-
-}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/LaReferenciaStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/LaReferenciaStats.java
deleted file mode 100755
index 60c4afb30..000000000
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/LaReferenciaStats.java
+++ /dev/null
@@ -1,321 +0,0 @@
-
-package eu.dnetlib.oa.graph.usagestatsbuild.export;
-
-import java.io.*;
-import java.net.URLDecoder;
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.sql.Timestamp;
-import java.text.SimpleDateFormat;
-import java.util.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
-import org.json.simple.JSONArray;
-import org.json.simple.JSONObject;
-import org.json.simple.parser.JSONParser;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * @author D. Pierrakos, S. Zoupanos
- */
-public class LaReferenciaStats {
-
- private static final Logger logger = LoggerFactory.getLogger(LaReferenciaStats.class);
-
- private String logRepoPath;
-
- private Statement stmt = null;
-
- private String CounterRobotsURL;
- private ArrayList robotsList;
-
- public LaReferenciaStats() throws Exception {
- }
-
- public void processLogs() throws Exception {
- try {
- logger.info("LaReferencia creating viewsStats");
- viewsStats();
- logger.info("LaReferencia created viewsStats");
-
- logger.info("LaReferencia creating downloadsStats");
- downloadsStats();
- logger.info("LaReferencia created downloadsStats");
-
- logger.info("LaReferencia creating COUNTER CoP R5 metrics");
- createCoPR5TablesForLareferencia();
- logger.info("LaReferencia created COUNTER CoP R5 metrics");
-
-// logger.info("LaReferencia updating Production Tables");
-// updateProdTables();
-// logger.info("LaReferencia updated Production Tables");
-
- } catch (Exception e) {
- logger.error("Failed to process logs: " + e);
- throw new Exception("Failed to process logs: " + e.toString(), e);
- }
- }
-
- public void viewsStats() throws Exception {
-
- Statement stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- logger.info("Creating la_result_views_monthly_tmp view");
- String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".la_result_views_monthly_tmp AS "
- +
- "SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
- "THEN 1 ELSE 0 END) AS openaire_referrer, " +
- "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
- "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog where action='action' and " +
- "(source_item_type='oaItem' or source_item_type='repItem') " +
- "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
- "source ORDER BY source, entity_id";
- stmt.executeUpdate(sql);
- logger.info("Created la_result_views_monthly_tmp view");
-
- logger.info("Dropping la_views_stats_tmp table");
- sql = "DROP TABLE IF EXISTS " +
- ConnectDB.getUsageStatsDBSchema() +
- ".la_views_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("Dropped la_views_stats_tmp table");
-
- logger.info("Creating la_views_stats_tmp table");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".la_views_stats_tmp " +
- "AS SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date, " +
- "max(views) AS count, max(openaire_referrer) AS openaire " +
- "FROM " + ConnectDB.getUsageStatsDBSchema() + ".la_result_views_monthly_tmp p, " +
- ConnectDB.getStatsDBSchema() + ".datasource_oids d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
- "WHERE p.source=d.oid AND p.id=ro.oid " +
- "GROUP BY d.id, ro.id, month " +
- "ORDER BY d.id, ro.id, month";
- stmt.executeUpdate(sql);
- logger.info("Created la_views_stats_tmp table");
-
- stmt.close();
- // ConnectDB.getHiveConnection().close();
- }
-
- private void downloadsStats() throws Exception {
-
- Statement stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- logger.info("Creating la_result_downloads_monthly_tmp view");
- String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
- + ".la_result_downloads_monthly_tmp AS " +
- "SELECT entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' " +
- "THEN 1 ELSE 0 END) AS openaire_referrer, " +
- "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source " +
- "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog where action='download' and " +
- "(source_item_type='oaItem' or source_item_type='repItem') " +
- "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), " +
- "source ORDER BY source, entity_id";
- stmt.executeUpdate(sql);
- logger.info("Created la_result_downloads_monthly_tmp view");
-
- logger.info("Dropping la_downloads_stats_tmp table");
- sql = "DROP TABLE IF EXISTS " +
- ConnectDB.getUsageStatsDBSchema() +
- ".la_downloads_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("Dropped la_downloads_stats_tmp table");
-
- logger.info("Creating la_downloads_stats_tmp table");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".la_downloads_stats_tmp " +
- "AS SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date, " +
- "max(downloads) AS count, max(openaire_referrer) AS openaire " +
- "FROM " + ConnectDB.getUsageStatsDBSchema() + ".la_result_downloads_monthly_tmp p, " +
- ConnectDB.getStatsDBSchema() + ".datasource_oids d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro " +
- "WHERE p.source=d.oid AND p.id=ro.oid " +
- "GROUP BY d.id, ro.id, month " +
- "ORDER BY d.id, ro.id, month";
- stmt.executeUpdate(sql);
- logger.info("Created la_downloads_stats_tmp table");
-
- stmt.close();
- // ConnectDB.getHiveConnection().close();
- }
-
- private void createCoPR5TablesForLareferencia() throws Exception {
- Statement stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- // Unique Item Investigations
-
- logger.info("Create View Unique_Item_Investigations");
- String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
- + ".lr_view_unique_item_investigations "
- + "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_investigations, "
- + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog "
- + "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
- + "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
- stmt.executeUpdate(sql);
- logger.info("Created View Unique_Item_Investigations");
-
- logger.info("Drop Table Unique_Item_Investigations");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_unique_item_investigations ";
- stmt.executeUpdate(sql);
- logger.info("Dropped Table Unique_Item_Investigations");
-
- logger.info("Create Table tbl_unique_item_investigations");
- sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_unique_item_investigations as "
- + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "sum(unique_item_investigations) AS unique_item_investigations, sum(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".lr_view_unique_item_investigations p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
- + "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
- + "GROUP BY d.id, ro.id, month ";
- stmt.executeUpdate(sql);
- logger.info("Created Table tbl_unique_item_investigations");
-
- // Total Item Investigations
-
- logger.info("Create View lr_view_total_item_investigations");
- sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".lr_view_total_item_investigations "
- + "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "COUNT(entity_id) AS total_item_investigations, "
- + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog "
- + "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
- + "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
- stmt.executeUpdate(sql);
- logger.info("Created View lr_view_total_item_investigations");
-
- logger.info("Drop Table lr_tbl_total_item_investigations");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_total_item_investigations ";
- stmt.executeUpdate(sql);
- logger.info("Dropped Table lr_tbl_total_item_investigations");
-
- logger.info("Create Table lr_tbl_total_item_investigations");
- sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_total_item_investigations as "
- + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "sum(total_item_investigations) AS total_item_investigations, sum(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".lr_view_total_item_investigations p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
- + "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
- + "GROUP BY d.id, ro.id, month ";
- stmt.executeUpdate(sql);
- logger.info("Created Table lr_tbl_total_item_investigations");
-
- // Unique Item Requests
-
- logger.info("Create View lr_view_unique_item_requests");
- sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".lr_view_unique_item_requests AS "
- + "SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_requests, "
- + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog "
- + "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
- + "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
- stmt.executeUpdate(sql);
- logger.info("Created View lr_view_unique_item_requests");
-
- logger.info("Drop Table Unique_Item_Requests");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_unique_item_requests ";
- stmt.executeUpdate(sql);
- logger.info("Dropped Table Unique_Item_Requests");
-
- logger.info("Create Table lr_tbl_unique_item_requests");
- sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_unique_item_requests as "
- + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "sum(unique_item_requests) AS unique_item_requests, sum(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".lr_view_unique_item_requests p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
- + "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
- + "GROUP BY d.id, ro.id, month ";
- stmt.executeUpdate(sql);
- logger.info("Created Table lr_tbl_unique_item_requests");
-
- // Total Item Requests
-
- logger.info("Create View lr_view_total_item_requests");
- sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".lr_view_total_item_requests "
- + "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "COUNT(entity_id) AS total_item_requests, "
- + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".lareferencialog "
- + "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
- + "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
- stmt.executeUpdate(sql);
- logger.info("Created View lr_view_total_item_requests");
-
- logger.info("Drop Table lr_tbl_total_item_requests");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_total_item_requests ";
- stmt.executeUpdate(sql);
- logger.info("Dropped Table lr_tbl_total_item_requests");
-
- logger.info("Create Table lr_tbl_total_item_requests");
- sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_total_item_requests as "
- + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "sum(total_item_requests) AS total_item_requests, sum(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_requests p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
- + "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
- + "GROUP BY d.id, ro.id, month ";
- stmt.executeUpdate(sql);
- logger.info("Created Table lr_tbl_total_item_requests");
-
- // All CoP R5 metrics Table
- logger.info("Drop Table lr_tbl_all_r5_metrics");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_all_r5_metrics ";
- stmt.executeUpdate(sql);
- logger.info("Dropped Table lr_tbl_all_r5_metrics");
-
- logger.info("Create Table lr_tbl_all_r5_metrics");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_all_r5_metrics as "
- + "WITH tmp1 as (SELECT coalesce(ds.repository_id, vs.repository_id) as repository_id, "
- + "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
- + "coalesce(vs.unique_item_investigations, 0) as unique_item_investigations, "
- + "coalesce(ds.total_item_investigations, 0) as total_item_investigations "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_unique_item_investigations AS vs "
- + "FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_total_item_investigations AS ds "
- + " ON ds.source=vs.source AND ds.result_id=vs.result_id AND ds.date=vs.date), "
- + "tmp2 AS (select coalesce (ds.repository_id, vs.repository_id) as repository_id, "
- + "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
- + "coalesce(ds.total_item_investigations, 0) as total_item_investigations, "
- + "coalesce(ds.unique_item_investigations, 0) as unique_item_investigations, "
- + " coalesce(vs.unique_item_requests, 0) as unique_item_requests FROM tmp1 "
- + "AS ds FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_unique_item_requests AS vs "
- + "ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date) "
- + "SELECT 'LaReferencia' as source, coalesce (ds.repository_id, vs.repository_id) as repository_id, "
- + "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
- + "coalesce(ds.unique_item_investigations, 0) as unique_item_investigations, "
- + "coalesce(ds.total_item_investigations, 0) as total_item_investigations, "
- + "coalesce(ds.unique_item_requests, 0) as unique_item_requests, "
- + "coalesce(vs.total_item_requests, 0) as total_item_requests "
- + "FROM tmp2 AS ds FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_total_item_requests "
- + "AS vs ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date";
- stmt.executeUpdate(sql);
- logger.info("Created Table tbl_all_r5_metrics");
-
- stmt.close();
- ConnectDB.getHiveConnection().close();
-
- }
-
-}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/PiwikStatsDB.java
deleted file mode 100755
index d20f37363..000000000
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/PiwikStatsDB.java
+++ /dev/null
@@ -1,1112 +0,0 @@
-
-package eu.dnetlib.oa.graph.usagestatsbuild.export;
-
-import java.sql.*;
-import java.text.SimpleDateFormat;
-import java.util.*;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * @author D. Pierrakos, S. Zoupanos
- */
-public class PiwikStatsDB {
-
- private String logPath;
-
- private Statement stmt = null;
-
- private static final Logger logger = LoggerFactory.getLogger(PiwikStatsDB.class);
-
- public PiwikStatsDB() throws Exception {
-
- }
-
- public void recreateDBAndTables() throws Exception {
- this.createDatabase();
- // The piwiklog table is not needed since it is built
- // on top of JSON files
- //////////// this.createTmpTables();
- }
-
- private void createDatabase() throws Exception {
-
- try {
-
- stmt = ConnectDB.getHiveConnection().createStatement();
-
- logger.info("Dropping usagestats DB: " + ConnectDB.getUsageStatsDBSchema());
- String dropDatabase = "DROP DATABASE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + " CASCADE";
- stmt.executeUpdate(dropDatabase);
- } catch (Exception e) {
- logger.error("Failed to drop database: " + e);
- throw new Exception("Failed to drop database: " + e.toString(), e);
- }
-
- try {
-
- logger.info("Creating usagestats DB: " + ConnectDB.getUsageStatsDBSchema());
- String createDatabase = "CREATE DATABASE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema();
- stmt.executeUpdate(createDatabase);
- logger.info("Usagestats DB created: " + ConnectDB.getUsageStatsDBSchema());
-
- } catch (Exception e) {
- logger.error("Failed to create database: " + e);
- throw new Exception("Failed to create database: " + e.toString(), e);
- }
-
- try {
- stmt = ConnectDB.getHiveConnection().createStatement();
-
- logger.info("Creating permanent usagestats DB: " + ConnectDB.getUsagestatsPermanentDBSchema());
- String createPermanentDatabase = "CREATE DATABASE IF NOT EXISTS "
- + ConnectDB.getUsagestatsPermanentDBSchema();
- stmt.executeUpdate(createPermanentDatabase);
- logger.info("Created permanent usagestats DB: " + ConnectDB.getUsagestatsPermanentDBSchema());
-
- } catch (Exception e) {
- logger.error("Failed to create database: " + e);
- throw new Exception("Failed to create database: " + e.toString(), e);
- }
- }
-
- public void createDistinctPiwikLog() throws Exception {
- logger.info("Initialising DB properties");
- ConnectDB.init();
-
- Statement stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- logger.info("Dropping piwiklogdistinct");
- String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct";
- stmt.executeUpdate(sql);
- logger.info("Dropped piwiklogdistinct");
-
- logger.info("Creating piwiklogdistinct table");
- // Create Piwiklogdistinct table - This table should exist
- String sqlCreateTablePiwikLogDistinct = "CREATE TABLE IF NOT EXISTS "
- + ConnectDB.getUsageStatsDBSchema()
- + ".piwiklogdistinct(source INT, id_visit STRING, country STRING, action STRING, url STRING, "
- + "entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) "
- + "clustered by (source, id_visit, action, timestamp, entity_id) "
- + "into 100 buckets stored as orc tblproperties('transactional'='true')";
- stmt.executeUpdate(sqlCreateTablePiwikLogDistinct);
- logger.info("Created piwiklogdistinct table");
-
- logger.info("Inserting data to piwiklogdistinct");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
- + "SELECT DISTINCT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".piwiklog WHERE entity_id is not null";
- stmt.executeUpdate(sql);
- logger.info("Inserted data to piwiklogdistinct");
- }
-
- public void processLogs() throws Exception {
- try {
-
- logger.info("ViewsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
- viewsStats();
- logger.info("ViewsStats processing ends at: " + new Timestamp(System.currentTimeMillis()));
-
- logger.info("DownloadsStats processing starts at: " + new Timestamp(System.currentTimeMillis()));
- downloadsStats();
- logger.info("DownloadsStats processing ends at: " + new Timestamp(System.currentTimeMillis()));
-
- logger.info("COUNTER CoP R5 metrics processing starts at: " + new Timestamp(System.currentTimeMillis()));
- createCoPR5Tables();
- logger.info("COUNTER CoP R5 metrics processing ends at: " + new Timestamp(System.currentTimeMillis()));
-
- } catch (Exception e) {
- logger.error("Failed to process logs: " + e);
- throw new Exception("Failed to process logs: " + e.toString(), e);
- }
- }
-
- public void processEpisciencesLogs() throws Exception {
- try {
-
- logger.info("Views Episciences processing starts at: " + new Timestamp(System.currentTimeMillis()));
- episciencesViewsStats();
- logger.info("Views Episciences processing ends at: " + new Timestamp(System.currentTimeMillis()));
-
- logger.info("downloads Episciences processing starts at: " + new Timestamp(System.currentTimeMillis()));
- episciencesDownloadsStats();
- logger.info("Downloads Episciences processing ends at: " + new Timestamp(System.currentTimeMillis()));
-
- } catch (Exception e) {
- logger.error("Failed to process logs: " + e);
- throw new Exception("Failed to process logs: " + e.toString(), e);
- }
- }
-
- public void viewsStats() throws Exception {
- Statement stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- logger.info("Dropping openaire_result_views_monthly_tmp view");
- String drop_result_views_monthly = "DROP VIEW IF EXISTS "
- + ConnectDB.getUsageStatsDBSchema()
- + ".openaire_piwikresult_views_monthly_tmp";
- stmt.executeUpdate(drop_result_views_monthly);
- logger.info("Dropped openaire_result_views_monthly_tmp view");
-
- logger.info("Creating openaire_result_views_monthly_tmp view");
- String create_result_views_monthly = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
- + ".openaire_result_views_monthly_tmp "
- + "AS SELECT entity_id, "
- + "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
- + "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
- + "AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageStatsDBSchema()
- + ".piwiklogdistinct where action='action' and (source_item_type='oaItem' or "
- + "source_item_type='repItem') "
- + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
- + "source ORDER BY source, entity_id";
- stmt.executeUpdate(create_result_views_monthly);
- logger.info("Created openaire_result_views_monthly_tmp table");
-
- logger.info("Dropping openaire_views_stats_tmp table");
- String drop_views_stats = "DROP TABLE IF EXISTS "
- + ConnectDB.getUsageStatsDBSchema()
- + ".openaire_views_stats_tmp";
- stmt.executeUpdate(drop_views_stats);
- logger.info("Dropped openaire_views_stats_tmp table");
-
- logger.info("Creating openaire_views_stats_tmp table");
- String create_views_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + ".openaire_views_stats_tmp "
- + "AS SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "max(views) AS count, max(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
- + "AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
- + "GROUP BY d.id, ro.id, month "
- + "ORDER BY d.id, ro.id, month ";
- stmt.executeUpdate(create_views_stats);
- logger.info("Created openaire_views_stats_tmp table");
-
- logger.info("Creating openaire_pageviews_stats_tmp table");
- String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + ".openaire_pageviews_stats_tmp AS SELECT "
- + "'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_views_monthly_tmp p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.source=" + ExecuteWorkflow.portalMatomoID
- + " AND p.source=d.piwik_id and p.id=ro.id AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
- + "AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
- + "GROUP BY d.id, ro.id, month "
- + "ORDER BY d.id, ro.id, month ";
- stmt.executeUpdate(create_pageviews_stats);
- logger.info("Created pageviews_stats table");
-
- stmt.close();
- // ConnectDB.getHiveConnection().close();
- }
-
- private void downloadsStats() throws Exception {
- Statement stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- logger.info("Dropping openaire_result_downloads_monthly_tmp view");
- String drop_result_downloads_monthly = "DROP VIEW IF EXISTS "
- + ConnectDB.getUsageStatsDBSchema()
- + ".openaire_result_downloads_monthly_tmp";
- stmt.executeUpdate(drop_result_downloads_monthly);
- logger.info("Dropped openaire_result_downloads_monthly_tmp view");
-
- logger.info("Creating openaire_result_downloads_monthly_tmp view");
- String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
- + ".openaire_result_downloads_monthly_tmp "
- + "AS SELECT entity_id, "
- + "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
- + "COUNT(entity_id) as downloads, "
- + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct where action='download' "
- + "AND (source_item_type='oaItem' OR source_item_type='repItem') "
- + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source "
- + "ORDER BY source, entity_id, month";
- stmt.executeUpdate(sql);
- logger.info("Created openaire_result_downloads_monthly_tmp view");
-
- logger.info("Dropping openaire_downloads_stats_tmp table");
- String drop_views_stats = "DROP TABLE IF EXISTS "
- + ConnectDB.getUsageStatsDBSchema()
- + ".openaire_downloads_stats_tmp";
- stmt.executeUpdate(drop_views_stats);
- logger.info("Dropped openaire_downloads_stats_tmp table");
-
- logger.info("Creating openaire_downloads_stats_tmp table");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".openaire_downloads_stats_tmp AS "
- + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "max(downloads) AS count, max(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.source=d.piwik_id and p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503' "
- + "AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
- + "GROUP BY d.id, ro.id, month "
- + "ORDER BY d.id, ro.id, month ";
- stmt.executeUpdate(sql);
- logger.info("Created downloads_stats table");
-
- logger.info("Dropping openaire_result_downloads_monthly_tmp view");
- sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".openaire_result_downloads_monthly_tmp";
- logger.info("Dropped openaire_result_downloads_monthly_tmp view ");
- stmt.executeUpdate(sql);
-
- stmt.close();
- // ConnectDB.getHiveConnection().close();
- }
-
- public void uploadOldPedocs() throws Exception {
- stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- // Dropping Pedocs pedocs_views_stats_tmp table
- logger.info("Dropping Pedocs pedocs_views_stats_tmp table");
- String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_views_stats_tmp";
- logger.info("Dropped pedocs_views_stats_tmp table ");
- stmt.executeUpdate(sql);
-
- // Dropping Pedocs pedocs_downloads_stats table
- logger.info("Dropping pedocs_downloads_stats table");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_downloads_stats";
- logger.info("Dropped pedocs_downloads_stats table ");
- stmt.executeUpdate(sql);
-
- // Creating Pedocs pedocs_views_stats_tmp table
- logger.info("Creating Pedocs pedocs_views_stats_tmp table");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_views_stats_tmp AS "
- + "SELECT 'OpenAIRE' as source, 'opendoar____::ab1a4d0dd4d48a2ba1077c4494791306' as repository_id,"
- + "r.id as result_id,date,counter_abstract as count, 0 as openaire "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".pedocsoldviews p, " + ConnectDB.getStatsDBSchema()
- + ".result_oids r where r.oid=p.identifier";
- stmt.executeUpdate(sql);
- logger.info("Created pedocs_views_stats_tmp table ");
-
- // Creating Pedocs pedocs_downloads_stats_tmp table
- logger.info("Creating Pedocs pedocs_downloads_stats_tmp table");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_downloads_stats_tmp AS "
- + "SELECT 'OpenAIRE' as source, 'opendoar____::ab1a4d0dd4d48a2ba1077c4494791306' as repository_id,"
- + "r.id as result_id, date, counter as count, 0 as openaire "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".pedocsolddownloads p, " + ConnectDB.getStatsDBSchema()
- + ".result_oids r where r.oid=p.identifier";
- stmt.executeUpdate(sql);
- logger.info("Created pedocs_downloads_stats_tmp table ");
-
- }
-
- public void uploadTUDELFTStats() throws Exception {
- stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- // Dropping TUDELFT tudelft_result_views_monthly_tmp view
- logger.info("Dropping TUDELFT tudelft_result_views_monthly_tmp view");
- String sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_views_monthly_tmp";
- logger.info("Dropped tudelft_result_views_monthly_tmp view ");
- stmt.executeUpdate(sql);
-
- // Dropping TUDELFT tudelft_result_views_monthly_tmp view
- logger.info("Dropping TUDELFT tudelft_result_downloads_monthly_tmp view");
- sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_downloads_monthly_tmp";
- logger.info("Dropped tudelft_result_downloads_monthly_tmp view ");
- stmt.executeUpdate(sql);
-
- // Dropping TUDELFT tudelft_views_stats_tmp table
- logger.info("Dropping TUDELFT tudelft_views_stats_tmp table");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_views_stats_tmp";
- logger.info("Dropped tudelft_views_stats_tmp table ");
- stmt.executeUpdate(sql);
-
- // Dropping TUDELFT tudelft_downloads_stats_tmp table
- logger.info("Dropping TUDELFT tudelft_downloads_stats_tmp table");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_downloads_stats_tmp";
- logger.info("Dropped tudelft_downloads_stats_tmp table ");
- stmt.executeUpdate(sql);
-
- // Creating TUDELFT tudelft_result_views_monthly_tmp view
- logger.info("Creating TUDELFT tudelft_result_views_monthly_tmp view");
- sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_views_monthly_tmp "
- + "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
- + "WHERE action='action' and (source_item_type='oaItem' or source_item_type='repItem') and source=252 "
- + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
- stmt.executeUpdate(sql);
- logger.info("Created tudelft_result_views_monthly_tmp view ");
-
- // Creating TUDELFT tudelft_views_stats_tmp table
- logger.info("Creating TUDELFT tudelft_views_stats_tmp table");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_views_stats_tmp AS "
- + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
- + ".tudelft_result_views_monthly_tmp p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE concat('tud:',p.id)=ro.oid and d.id='opendoar____::c9892a989183de32e976c6f04e700201' "
- + "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
- stmt.executeUpdate(sql);
- logger.info("Created TUDELFT tudelft_views_stats_tmp table");
-
- // Creating TUDELFT tudelft_result_downloads_monthly_tmp view
- logger.info("Creating TUDELFT tudelft_result_downloads_monthly_tmp view");
- sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_downloads_monthly_tmp "
- + "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
- + "WHERE action='download' and (source_item_type='oaItem' or source_item_type='repItem') and source=252 "
- + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
- stmt.executeUpdate(sql);
- logger.info("Created tudelft_result_downloads_monthly_tmp view ");
-
- // Creating TUDELFT tudelft_downloads_stats_tmp table
- logger.info("Creating TUDELFT tudelft_downloads_stats_tmp table");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_downloads_stats_tmp AS "
- + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
- + ".tudelft_result_downloads_monthly_tmp p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE concat('tud:',p.id)=ro.oid and d.id='opendoar____::c9892a989183de32e976c6f04e700201' "
- + "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
- stmt.executeUpdate(sql);
- logger.info("Created TUDELFT tudelft_downloads_stats_tmp table");
-
- // Dropping TUDELFT tudelft_result_views_monthly_tmp view
- logger.info("Dropping TUDELFT tudelft_result_views_monthly_tmp view");
- sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_views_monthly_tmp";
- logger.info("Dropped tudelft_result_views_monthly_tmp view ");
- stmt.executeUpdate(sql);
-
- // Dropping TUDELFT tudelft_result_views_monthly_tmp view
- logger.info("Dropping TUDELFT tudelft_result_downloads_monthly_tmp view");
- sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_result_downloads_monthly_tmp";
- logger.info("Dropped tudelft_result_downloads_monthly_tmp view ");
- stmt.executeUpdate(sql);
-
- }
-
- public void uploadB2SHAREStats() throws Exception {
- stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- // Dropping B2SHARE b2share_result_views_monthly_tmp view
- logger.info("Dropping B2SHARE b2share_result_views_monthly_tmp view");
- String sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_views_monthly_tmp";
- logger.info("Dropped b2share_result_views_monthly_tmp view ");
- stmt.executeUpdate(sql);
-
- // Dropping B2SHARE b2share_result_views_monthly_tmp view
- logger.info("Dropping b2SHARE b2share_result_downloads_monthly_tmp view");
- sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_downloads_monthly_tmp";
- logger.info("Dropped b2share_result_downloads_monthly_tmp view ");
- stmt.executeUpdate(sql);
-
- // Dropping B2SHARE b2share_views_stats_tmp table
- logger.info("Dropping B2SHARE b2share_views_stats_tmp table");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_views_stats_tmp";
- logger.info("Dropped b2share_views_stats_tmp table ");
- stmt.executeUpdate(sql);
-
- // Dropping B2SHARE b2share_downloads_stats_tmp table
- logger.info("Dropping B2SHARE b2share_downloads_stats_tmp table");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp";
- logger.info("Dropped b2share_downloads_stats_tmp table ");
- stmt.executeUpdate(sql);
-
- // Creating B2SHARE b2share_result_views_monthly_tmp view
- logger.info("Creating B2SHARE b2share_result_views_monthly_tmp view");
- sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_views_monthly_tmp "
- + "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
- + "WHERE action='action' and (source_item_type='oaItem' or source_item_type='repItem') and source=412 "
- + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
- stmt.executeUpdate(sql);
- logger.info("Created b2share_result_views_monthly_tmp view ");
-
- // Creating B2SHARE b2share_views_stats_tmp table
- logger.info("Creating B2SHARE b2share_views_stats_tmp table");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_views_stats_tmp AS "
- + "SELECT 'B2SHARE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
- + ".b2share_result_views_monthly_tmp p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.id=ro.oid and d.id='re3data_____::ad3609c351bd520edf6f10f5e0d9b877' "
- + "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
- stmt.executeUpdate(sql);
- logger.info("Created B2SHARE b2share_views_stats_tmp table");
-
- // Creating B2SHARE b2share_result_downloads_monthly_tmp view
- logger.info("Creating B2SHARE b2share_result_downloads_monthly_tmp view");
- sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_downloads_monthly_tmp "
- + "AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
- + "WHERE action='download' and (source_item_type='oaItem' or source_item_type='repItem') and source=412 "
- + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ORDER BY source, entity_id";
- stmt.executeUpdate(sql);
- logger.info("Created b2share_result_downloads_monthly_tmp view ");
-
- // Creating B2SHARE b2share_downloads_stats_tmp table
- logger.info("Creating B2SHARE b2share_downloads_stats_tmp table");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp AS "
- + "SELECT 'B2SHARE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "max(views) AS count, max(openaire_referrer) AS openaire FROM " + ConnectDB.getUsageStatsDBSchema()
- + ".b2share_result_downloads_monthly_tmp p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d, " + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.id=ro.oid and d.id='re3data_____::ad3609c351bd520edf6f10f5e0d9b877' "
- + "GROUP BY d.id, ro.id, month ORDER BY d.id, ro.id";
- stmt.executeUpdate(sql);
- logger.info("Created B2SHARE b2share_downloads_stats_tmp table");
-
- // Dropping B2SHARE b2share_result_views_monthly_tmp view
- logger.info("Dropping B2SHARE b2share_result_views_monthly_tmp view");
- sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_views_monthly_tmp";
- logger.info("Dropped b2share_result_views_monthly_tmp view ");
- stmt.executeUpdate(sql);
-
- // Dropping B2SHARE b2share_result_views_monthly_tmp view
- logger.info("Dropping B2SHARE b2share_result_downloads_monthly_tmp view");
- sql = "DROP view IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".b2share_result_downloads_monthly_tmp";
- logger.info("Dropped b2share_result_downloads_monthly_tmp view ");
- stmt.executeUpdate(sql);
-
- }
-
- public void episciencesViewsStats() throws Exception {
- logger.info("Creating episciences Views");
-
- Statement stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- logger.info("Dropping Episcience Views Table");
- String dropEpisciencesViewsTable = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + ".episciencesviews ";
- stmt.executeUpdate(dropEpisciencesViewsTable);
- logger.info("Dropped Episcience Views Table");
-
- logger.info("Creating Episcience Views Table");
- String createEpisciencesViewsTable = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + ".episciencesviews (source STRING, repository_id STRING, result_id STRING, date STRING, count INT, openaire INT)"
- + " clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true') ";
-
- stmt.executeUpdate(createEpisciencesViewsTable);
-
- String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
- + ConnectDB.getStatsDBSchema() +
- ".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true)";
-
- PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
- .prepareStatement(returnEpisciencesJournals);
- ResultSet rs = st.executeQuery();
- while (rs.next()) {
- String journal_openaire_id = rs.getString(1);
- String episciencesSuffix = rs.getString(2);
-
- logger.info("Working on journal_id:" + journal_openaire_id + " suffix:" + episciencesSuffix);
- logger.info("Dropping episciencesSuffix_result_views_monthly_tmp table");
- String dropepisciencesSuffixView = "DROP VIEW " + ConnectDB.getUsageStatsDBSchema()
- + "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp";
- // Statement stmtRS = ConnectDB.getHiveConnection().createStatement();
- stmt.executeUpdate(dropepisciencesSuffixView);
- logger.info("Dropped episciencesSuffix_result_views_monthly_tmp table");
-
- logger.info("Creating episciencesSuffix_result_views_monthly_tmp table");
-
- String create_result_views_monthly = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
- + "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp "
- + "AS SELECT entity_id, "
- + "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
- + "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
- + "AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema()
- + ".episcienceslog where action='action' and (source_item_type='oaItem' or "
- + "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
- + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
- + "source ORDER BY source, entity_id";
-
- stmt.executeUpdate(create_result_views_monthly);
- logger.info("Created episciencesSuffix_result_views_monthly_tmp table");
-
- logger.info("Inserting episciencesSuffix_result_views_monthly_tmp into EpisciencesViews Table");
- String insertIntoEpisciencesViewsTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
- + ".episciencesviews SELECT 'Episciences' as source, '"
- + journal_openaire_id + "' as repository_id, ro.id as result_id, month as date,"
- + " max(views) AS count, max(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema()
- + "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp p,"
- + ConnectDB.getStatsDBSchema()
- + ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
-
- stmt.executeUpdate(insertIntoEpisciencesViewsTable);
- logger.info("Inserted episciencesSuffix_result_views_monthly_tmp into EpisciencesViews Table");
-
- stmt.executeUpdate(dropepisciencesSuffixView);
- logger.info("Dropped episciencesSuffix_result_views_monthly_tmp view");
- }
- rs.close();
-
- logger.info("Episciences Views Created");
- }
-
- public void episciencesDownloadsStats() throws Exception {
- logger.info("Creating episciences Downloads");
-
- Statement stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- logger.info("Dropping Episcience Downloads Table");
- String dropEpisciencesDownloadsTable = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + ".episciencesvdownloads ";
- stmt.executeUpdate(dropEpisciencesDownloadsTable);
- logger.info("Dropped Episcience Downloads Table");
-
- logger.info("Creating Episcience Downloads Table");
- String createEpisciencesDownloadsTable = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + ".episciencesdownloads (source STRING, repository_id STRING, result_id STRING, date STRING, count INT, openaire INT)"
- + " clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true') ";
-
- stmt.executeUpdate(createEpisciencesDownloadsTable);
-
- String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
- + ConnectDB.getStatsDBSchema() +
- ".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true)";
-
- PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
- .prepareStatement(returnEpisciencesJournals);
- ResultSet rs = st.executeQuery();
- while (rs.next()) {
- String journal_openaire_id = rs.getString(1);
- String episciencesSuffix = rs.getString(2);
-
- logger.info("Working on journal_id:" + journal_openaire_id + " suffix:" + episciencesSuffix);
- logger.info("Dropping episciencesSuffix_result_downloads_monthly_tmp table");
- String dropepisciencesSuffixDownloads = "DROP VIEW IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp";
- stmt.executeUpdate(dropepisciencesSuffixDownloads);
-
- logger.info("Creating episciencesSuffix_result_downloads_monthly_tmp table");
-
- String create_result_downloads_monthly = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
- + "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp "
- + "AS SELECT entity_id, "
- + "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
- + "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
- + "AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema()
- + ".episcienceslog where action='download' and (source_item_type='oaItem' or "
- + "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
- + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
- + "source ORDER BY source, entity_id";
-
- stmt.executeUpdate(create_result_downloads_monthly);
- logger.info("Created episciencesSuffix_result_downloads_monthly_tmp table");
-
- logger.info("Inserting episciencesSuffix_result_downloads_monthly_tmp into EpisciencesDownloadsTable");
- String insertIntoEpisciencesDownloadsTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
- + ".episciencesdownloads SELECT 'Episciences' as source, '"
- + journal_openaire_id + "' as repository_id, ro.id as result_id, month as date,"
- + " max(views) AS count, max(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema()
- + "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp p,"
- + ConnectDB.getStatsDBSchema()
- + ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
-
- stmt.executeUpdate(insertIntoEpisciencesDownloadsTable);
- logger.info("Inserted episciencesSuffix_result_downloads_monthly_tmp into EpisciencesDownloadsTable");
-
- stmt.executeUpdate(dropepisciencesSuffixDownloads);
- logger.info("Dropped episciencesSuffix_result_downloads_monthly_tmp view");
-
- }
- rs.close();
- }
-
- private void createCoPR5Tables() throws Exception {
- Statement stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- // Unique Item Investigations
-//REMOVE sessionid from total
- logger.info("Create View Unique_Item_Investigations");
- String sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_investigations "
- + "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_investigations, "
- + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
- + "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
- + "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
- stmt.executeUpdate(sql);
- logger.info("Created View Unique_Item_Investigations");
-
- logger.info("Drop Table Unique_Item_Investigations");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_investigations ";
- stmt.executeUpdate(sql);
- logger.info("Dropped Table Unique_Item_Investigations");
-
- logger.info("Create Table tbl_unique_item_investigations");
- sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_investigations as "
- + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "sum(unique_item_investigations) AS unique_item_investigations, sum(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_investigations p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
- + "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
- + "GROUP BY d.id, ro.id, month ";
- stmt.executeUpdate(sql);
- logger.info("Created Table tbl_unique_item_investigations");
-
- // Total Item Investigations
-
- logger.info("Create View view_total_item_investigations");
- sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_investigations "
- + "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "COUNT(entity_id) AS total_item_investigations, "
- + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
- + "WHERE (source_item_type='oaItem' or source_item_type='repItem') "
- + "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
- stmt.executeUpdate(sql);
- logger.info("Created View view_total_item_investigations");
-
- logger.info("Drop Table tbl_total_item_investigations");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_investigations ";
- stmt.executeUpdate(sql);
- logger.info("Dropped Table tbl_total_item_investigations");
-
- logger.info("Create Table tbl_total_item_investigations");
- sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_investigations AS "
- + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "sum(total_item_investigations) AS total_item_investigations, sum(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_investigations p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
- + "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
- + "GROUP BY d.id, ro.id, month ";
- stmt.executeUpdate(sql);
- logger.info("Created Table tbl_total_item_investigations");
-
- // Unique Item Requests
-
- logger.info("Create View view_unique_item_requests");
- sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_requests AS "
- + "SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_requests, "
- + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
- + "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
- + "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
- stmt.executeUpdate(sql);
- logger.info("Created View view_unique_item_requests");
-
- logger.info("Drop Table Unique_Item_Requests");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_requests ";
- stmt.executeUpdate(sql);
- logger.info("Dropped Table Unique_Item_Requests");
-
- logger.info("Create Table tbl_unique_item_requests");
- sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_requests as "
- + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "sum(unique_item_requests) AS unique_item_requests, sum(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_unique_item_requests p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
- + "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
- + "GROUP BY d.id, ro.id, month ";
- stmt.executeUpdate(sql);
- logger.info("Created Table tbl_unique_item_requests");
-
- // Total Item Requests
-
- logger.info("Create View view_total_item_requests");
- sql = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_requests "
- + "AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id, "
- + "COUNT(entity_id) AS total_item_requests, "
- + "SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".piwiklogdistinct "
- + "WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem') "
- + "AND entity_id is NOT NULL GROUP BY id_visit, entity_id, "
- + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source ";
- stmt.executeUpdate(sql);
- logger.info("Created View view_total_item_requests");
-
- logger.info("Drop Table tbl_total_item_requests");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests ";
- stmt.executeUpdate(sql);
- logger.info("Dropped Table tbl_total_item_requests");
-
- logger.info("Create Table tbl_total_item_requests");
- sql = "CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests as "
- + "SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, "
- + "sum(total_item_requests) AS total_item_requests, sum(openaire_referrer) AS openaire "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".view_total_item_requests p, "
- + ConnectDB.getStatsDBSchema() + ".datasource d," + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404' "
- + "AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' "
- + "GROUP BY d.id, ro.id, month ";
- stmt.executeUpdate(sql);
- logger.info("Created Table tbl_total_item_requests");
-
- // All CoP R5 metrics Table
- logger.info("Drop Table tbl_all_r5_metrics");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics ";
- stmt.executeUpdate(sql);
- logger.info("Dropped Table tbl_all_r5_metrics");
-
- logger.info("Create Table tbl_all_r5_metrics");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + ".tbl_all_r5_metrics as "
- + "WITH tmp1 as (SELECT coalesce(ds.repository_id, vs.repository_id) as repository_id, "
- + "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
- + "coalesce(vs.unique_item_investigations, 0) as unique_item_investigations, "
- + "coalesce(ds.total_item_investigations, 0) as total_item_investigations "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_investigations AS vs "
- + "FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_investigations AS ds "
- + " ON ds.source=vs.source AND ds.result_id=vs.result_id AND ds.date=vs.date), "
- + "tmp2 AS (select coalesce (ds.repository_id, vs.repository_id) as repository_id, "
- + "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
- + "coalesce(ds.total_item_investigations, 0) as total_item_investigations, "
- + "coalesce(ds.unique_item_investigations, 0) as unique_item_investigations, "
- + " coalesce(vs.unique_item_requests, 0) as unique_item_requests FROM tmp1 "
- + "AS ds FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_unique_item_requests AS vs "
- + "ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date) "
- + "SELECT 'OpenAIRE' as source, coalesce (ds.repository_id, vs.repository_id) as repository_id, "
- + "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
- + "coalesce(ds.unique_item_investigations, 0) as unique_item_investigations, "
- + "coalesce(ds.total_item_investigations, 0) as total_item_investigations, "
- + "coalesce(ds.unique_item_requests, 0) as unique_item_requests, "
- + "coalesce(vs.total_item_requests, 0) as total_item_requests "
- + "FROM tmp2 AS ds FULL OUTER JOIN " + ConnectDB.getUsageStatsDBSchema() + ".tbl_total_item_requests "
- + "AS vs ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date";
- stmt.executeUpdate(sql);
- logger.info("Created Table tbl_all_r5_metrics");
- stmt.close();
- ConnectDB.getHiveConnection().close();
-
- }
-
- public void finalizeStats() throws Exception {
- stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- // Dropping views_stats table
- logger.info("Dropping views_stats table");
- String sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".views_stats";
- logger.info("Dropped views_stats table ");
- stmt.executeUpdate(sql);
-
- // Dropping downloads_stats table
- logger.info("Dropping downloads_stats table");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats";
- logger.info("Dropped downloads_stats table ");
- stmt.executeUpdate(sql);
-
- // Dropping page_views_stats table
- logger.info("Dropping pageviews_stats table");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats";
- logger.info("Dropped pageviews_stats table ");
- stmt.executeUpdate(sql);
-
- // Dropping usage_stats table
- logger.info("Dropping usage_stats table");
- sql = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats";
- logger.info("Dropped usage_stats table ");
- stmt.executeUpdate(sql);
-
- // Creating views_stats table
- logger.info("Creating views_stats table");
- String createViewsStats = "CREATE TABLE IF NOT EXISTS "
- + ConnectDB.getUsageStatsDBSchema()
- + ".views_stats "
- + "LIKE " + ConnectDB.getUsageStatsDBSchema() + ".openaire_views_stats_tmp STORED AS PARQUET";
- stmt.executeUpdate(createViewsStats);
- logger.info("Created views_stats table");
-
- // Inserting OpenAIRE views stats
- logger.info("Inserting Openaire data to views_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_views_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("Openaire views updated to views_stats");
-
- // Inserting Episciences views stats
- logger.info("Inserting Episciences data to views_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".episciencesviews";
- stmt.executeUpdate(sql);
- logger.info("Episciences views updated to views_stats");
-
- // Inserting Pedocs old views stats
- logger.info("Inserting Pedocs old data to views_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_views_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("Pedocs views updated to views_stats");
-
- // Inserting TUDELFT views stats
- logger.info("Inserting TUDELFT data to views_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_views_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("TUDELFT views updated to views_stats");
-
- // Inserting Lareferencia views stats
- logger.info("Inserting LaReferencia data to views_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".la_views_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("LaReferencia views updated to views_stats");
-
- // Inserting B2SHARE views stats
- logger.info("Inserting B2SHARE data to views_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".b2share_views_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("B2SHARE views updated to views_stats");
-
- logger.info("Creating downloads_stats table");
- String createDownloadsStats = "CREATE TABLE IF NOT EXISTS "
- + ConnectDB.getUsageStatsDBSchema()
- + ".downloads_stats "
- + "LIKE " + ConnectDB.getUsageStatsDBSchema() + ".openaire_downloads_stats_tmp STORED AS PARQUET";
- stmt.executeUpdate(createDownloadsStats);
- logger.info("Created downloads_stats table");
-
- // Inserting OpenAIRE downloads stats
- logger.info("Inserting OpenAIRE data to downloads_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_downloads_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("Inserted OpenAIRE data to downloads_stats");
-
- // Inserting Episciences views stats
- logger.info("Inserting Episciences data to downloads_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".episciencesdownloads";
- stmt.executeUpdate(sql);
- logger.info("Episciences downloads updated to downloads_stats");
-
- // Inserting Pedocs old downloads stats
- logger.info("Inserting PeDocs old data to downloads_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pedocs_downloads_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("Inserted Pedocs data to downloads_stats");
-
- // Inserting TUDELFT downloads stats
- logger.info("Inserting TUDELFT data to downloads_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".tudelft_downloads_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("Inserted TUDELFT data to downloads_stats");
-
- // Inserting B2SHARE downloads stats
- logger.info("Inserting B2SHARE data to downloads_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".b2share_downloads_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("Inserted B2SHARE data to downloads_stats");
- // Inserting Lareferencia downloads stats
- logger.info("Inserting LaReferencia data to downloads_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".la_downloads_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("Lareferencia downloads updated to downloads_stats");
-
- // Inserting IRUS downloads stats
- logger.info("Inserting IRUS data to downloads_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".irus_downloads_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("IRUS downloads updated to downloads_stats");
-
- // Inserting IRUS_R5 downloads stats
- logger.info("Inserting IRUS_R5 views to views_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
- + "SELECT source, repository_id, result_id, `date`, views, openaire FROM "
- + ConnectDB.getUsageStatsDBSchema()
- + ".irus_R5_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("IRUS_R5 views updated to views_stats");
-
- // Inserting IRUS_R5 downloads stats
- logger.info("Inserting IRUS_R5 data to downloads_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
- + "SELECT source, repository_id, result_id, `date`, downloads, openaire FROM "
- + ConnectDB.getUsageStatsDBSchema()
- + ".irus_R5_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("IRUS_R5 downloads updated to downloads_stats");
-
- // Inserting SARC-OJS downloads stats
- logger.info("Inserting SARC data to downloads_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".sarc_downloads_stats_tmp";
- stmt.executeUpdate(sql);
- logger.info("SARC-OJS downloads updated to downloads_stats");
-
- // Inserting Datacite views stats
- logger.info("Inserting Datacite views to views_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".views_stats "
- + "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_views";
- stmt.executeUpdate(sql);
- logger.info("Datacite views updated to views_stats");
-
- // Inserting Datacite downloads stats
- logger.info("Inserting Datacite downloads to downloads_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats "
- + "SELECT * FROM " + ConnectDB.getUsageRawDataDBSchema() + ".datacite_downloads";
- stmt.executeUpdate(sql);
- logger.info("Datacite downloads updated to downloads_stats");
-
- logger.info("Creating pageviews_stats table");
- String create_pageviews_stats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + ".pageviews_stats "
- + "LIKE " + ConnectDB.getUsageStatsDBSchema() + ".openaire_pageviews_stats_tmp STORED AS PARQUET";
- stmt.executeUpdate(create_pageviews_stats);
- logger.info("Created pageviews_stats table");
-
- // Inserting OpenAIRE views stats from Portal
- logger.info("Inserting data to page_views_stats");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".openaire_pageviews_stats_tmp";
- stmt.executeUpdate(sql);
-
- logger.info("Dropping full_dates table");
- String dropFullDates = "DROP TABLE IF EXISTS "
- + ConnectDB.getUsageStatsDBSchema()
- + ".full_dates";
- stmt.executeUpdate(dropFullDates);
- logger.info("Dropped full_dates table");
-
- Calendar startCalendar = Calendar.getInstance();
- startCalendar.setTime(new SimpleDateFormat("yyyy-MM-dd").parse("2016-01-01"));
- Calendar endCalendar = Calendar.getInstance();
- int diffYear = endCalendar.get(Calendar.YEAR) - startCalendar.get(Calendar.YEAR);
- int diffMonth = diffYear * 12 + endCalendar.get(Calendar.MONTH) - startCalendar.get(Calendar.MONTH);
-
- logger.info("Creating full_dates table");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".full_dates AS "
- + "SELECT from_unixtime(unix_timestamp(cast(add_months(from_date,i) AS DATE)), 'yyyy/MM') AS txn_date "
- + "FROM (SELECT DATE '2016-01-01' AS from_date) p "
- + "LATERAL VIEW "
- + "posexplode(split(space(" + diffMonth + "),' ')) pe AS i,x";
- stmt.executeUpdate(sql);
- logger.info("Created full_dates table");
-
- logger.info("Inserting data to usage_stats");
- sql = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats AS "
- + "SELECT coalesce(ds.source, vs.source) as source, "
- + "coalesce(ds.repository_id, vs.repository_id) as repository_id, "
- + "coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date, "
- + "coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views, "
- + "coalesce(ds.openaire, 0) as openaire_downloads, "
- + "coalesce(vs.openaire, 0) as openaire_views "
- + "FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats AS ds FULL OUTER JOIN "
- + ConnectDB.getUsageStatsDBSchema() + ".views_stats AS vs ON ds.source=vs.source "
- + "AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date";
- stmt.executeUpdate(sql);
- logger.info("Inserted data to usage_stats");
-
- // Inserting LaReferencia CoP R5 Metrics
- logger.info("Inserting Lareferencia data to tbl_all_r5_metrics");
- sql = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics "
- + "SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".lr_tbl_all_r5_metrics";
- stmt.executeUpdate(sql);
-
- // Inserting IRUS-UK CoP R5 Metrics
- logger.info("Inserting IRUS-UK data into tbl_all_r5_metrics");
- String insertĪ”5Stats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics "
- + "SELECT s.source, d.id AS repository_id, "
- + "ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date, "
- + "s.unique_item_investigations , s.total_item_investigations, "
- + "s.unique_item_requests, s.total_item_requests "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".sushilog_cop_r5 s, "
- + ConnectDB.getStatsDBSchema() + ".datasource_oids d, "
- + ConnectDB.getStatsDBSchema() + ".result_oids ro "
- + "WHERE s.repository=d.oid AND s.rid=ro.oid AND s.source='IRUS-UK'";
- stmt.executeUpdate(insertĪ”5Stats);
- logger.info("Inserted IRUS-UK data into tbl_all_r5_metrics");
-
- logger.info("Building views at permanent DB starts at: " + new Timestamp(System.currentTimeMillis()));
-
- logger.info("Dropping view views_stats on permanent usagestats DB");
- sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".views_stats";
- stmt.executeUpdate(sql);
- logger.info("Dropped view views_stats on permanent usagestats DB");
-
- logger.info("Create view views_stats on permanent usagestats DB");
- sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".views_stats"
- + " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".views_stats";
- stmt.executeUpdate(sql);
- logger.info("Created view views_stats on permanent usagestats DB");
-
- logger.info("Dropping view pageviews_stats on permanent usagestats DB");
- sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".pageviews_stats";
- stmt.executeUpdate(sql);
- logger.info("Dropped view pageviews_stats on permanent usagestats DB");
-
- logger.info("Create view pageviews_stats on permanent usagestats DB");
- sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".pageviews_stats"
- + " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats";
- stmt.executeUpdate(sql);
- logger.info("Created view pageviews_stats on permanent usagestats DB");
-
- logger.info("Dropping view downloads_stats on permanent usagestats DB");
- sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".downloads_stats";
- stmt.executeUpdate(sql);
- logger.info("Dropped view on downloads_stats on permanent usagestats DB");
-
- logger.info("Create view on downloads_stats on permanent usagestats DB");
- sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".downloads_stats"
- + " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats";
- stmt.executeUpdate(sql);
- logger.info("Created view on downloads_stats on permanent usagestats DB");
-
- logger.info("Dropping view usage_stats on permanent usagestats DB");
- sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".usage_stats";
- stmt.executeUpdate(sql);
- logger.info("Dropped view on usage_stats on permanent usagestats DB");
-
- logger.info("Create view on usage_stats on permanent usagestats DB");
- sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".usage_stats"
- + " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats";
- stmt.executeUpdate(sql);
- logger.info("Created view on usage_stats on permanent usagestats DB");
-
- logger.info("Dropping view COUNTER_R5_Metrics on permanent usagestats DB");
- sql = "DROP VIEW IF EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema() + ".counter_r5_stats_with_metrics";
- stmt.executeUpdate(sql);
- logger.info("Dropped view COUNTER_R5_Metrics on permanent usagestats DB");
-
- logger.info("Create view on COUNTER_R5_Metrics on permanent usagestats DB");
- sql = "CREATE VIEW IF NOT EXISTS " + ConnectDB.getUsagestatsPermanentDBSchema()
- + ".counter_r5_stats_with_metrics"
- + " AS SELECT * FROM " + ConnectDB.getUsageStatsDBSchema() + ".tbl_all_r5_metrics";
- stmt.executeUpdate(sql);
- logger.info("Created view on COUNTER_R5_Metrics on permanent usagestats DB");
-
- logger.info("Building views at permanent DB ends at: " + new Timestamp(System.currentTimeMillis()));
-
- stmt.close();
- ConnectDB.getHiveConnection().close();
- }
-
- private Connection getConnection() throws SQLException {
- return ConnectDB.getHiveConnection();
- }
-}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/SarcStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/SarcStats.java
deleted file mode 100755
index 880233f00..000000000
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/SarcStats.java
+++ /dev/null
@@ -1,107 +0,0 @@
-
-package eu.dnetlib.oa.graph.usagestatsbuild.export;
-
-import java.io.*;
-// import java.io.BufferedReader;
-// import java.io.InputStreamReader;
-import java.net.URL;
-import java.net.URLConnection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.json.simple.JSONArray;
-import org.json.simple.JSONObject;
-import org.json.simple.parser.JSONParser;
-import org.json.simple.parser.ParseException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * @author D. Pierrakos, S. Zoupanos
- */
-public class SarcStats {
-
- private Statement stmtHive = null;
- private Statement stmtImpala = null;
-
- private static final Logger logger = LoggerFactory.getLogger(SarcStats.class);
-
- public SarcStats() throws Exception {
-// createTables();
- }
-
- private void createTables() throws Exception {
- try {
-
- stmtHive = ConnectDB.getHiveConnection().createStatement();
- String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
- stmtHive.executeUpdate(sqlCreateTableSushiLog);
-
- // String sqlCopyPublicSushiLog="INSERT INTO sushilog SELECT * FROM public.sushilog;";
- // stmt.executeUpdate(sqlCopyPublicSushiLog);
- String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
- + " ON INSERT TO sushilog "
- + " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository,"
- + "sushilog.rid, sushilog.date "
- + "FROM sushilog "
- + "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
- stmtHive.executeUpdate(sqlcreateRuleSushiLog);
- String createSushiIndex = "create index if not exists sushilog_duplicates on sushilog(source, repository, rid, date, metric_type);";
- stmtHive.executeUpdate(createSushiIndex);
-
- stmtHive.close();
- ConnectDB.getHiveConnection().close();
- logger.info("Sushi Tables Created");
- } catch (Exception e) {
- logger.error("Failed to create tables: " + e);
- throw new Exception("Failed to create tables: " + e.toString(), e);
- }
- }
-
- public void processSarc() throws Exception {
- Statement stmt = ConnectDB.getHiveConnection().createStatement();
- ConnectDB.getHiveConnection().setAutoCommit(false);
-
- logger.info("Creating sarc_downloads_stats_tmp table");
- String createDownloadsStats = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
- + ".sarc_downloads_stats_tmp "
- + "(`source` string, "
- + "`repository_id` string, "
- + "`result_id` string, "
- + "`date` string, "
- + "`count` bigint, "
- + "`openaire` bigint)";
- stmt.executeUpdate(createDownloadsStats);
- logger.info("Created sarc_downloads_stats_tmp table");
-
- logger.info("Inserting into sarc_downloads_stats_tmp");
- String insertSarcStats = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".sarc_downloads_stats_tmp "
- + "SELECT s.source, d.id AS repository_id, "
- + "ro.id as result_id, CONCAT(CAST(YEAR(`date`) AS STRING), '/', "
- + "LPAD(CAST(MONTH(`date`) AS STRING), 2, '0')) AS `date`, s.count, '0' "
- + "FROM " + ConnectDB.getUsageRawDataDBSchema() + ".sushilog s, "
- + ConnectDB.getStatsDBSchema() + ".datasource_oids d, "
- + ConnectDB.getStatsDBSchema() + ".result_pids ro "
- + "WHERE d.oid LIKE CONCAT('%', s.repository, '%') AND d.id like CONCAT('%', 'sarcservicod', '%') "
- + "AND s.rid=ro.pid AND ro.type='Digital Object Identifier' AND s.metric_type='ft_total' AND s.source='SARC-OJS'";
- stmt.executeUpdate(insertSarcStats);
- logger.info("Inserted into sarc_downloads_stats_tmp");
-
- stmt.close();
- // ConnectDB.getHiveConnection().close();
- }
-
-}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/UsageStatsExporter.java
deleted file mode 100755
index 886ebca23..000000000
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/UsageStatsExporter.java
+++ /dev/null
@@ -1,137 +0,0 @@
-
-package eu.dnetlib.oa.graph.usagestatsbuild.export;
-
-import java.io.IOException;
-import java.sql.SQLException;
-import java.sql.Statement;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Main class for downloading and processing Usage statistics
- *
- * @author D. Pierrakos, S. Zoupanos
- */
-public class UsageStatsExporter {
-
- public UsageStatsExporter() {
-
- }
-
- private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
-
- public void export() throws Exception {
-
- logger.info("Initialising DB properties");
- ConnectDB.init();
-
-// runImpalaQuery();
- PiwikStatsDB piwikstatsdb = new PiwikStatsDB();
- logger.info("Re-creating database and tables");
- if (ExecuteWorkflow.recreateDbAndTables) {
- piwikstatsdb.recreateDBAndTables();
- logger.info("DB-Tables are created ");
- }
-// else {
-// piwikstatsdb.createTmpTables();
-// logger.info("TmpTables are created ");
-// }
- if (ExecuteWorkflow.processPiwikLogs) {
- logger.info("Creating distinct piwik log");
- piwikstatsdb.createDistinctPiwikLog();
- logger.info("Processing OpenAIRE logs");
- piwikstatsdb.processLogs();
- logger.info("OpenAIRE logs Done");
- logger.info("Processing Episciences logs");
- piwikstatsdb.processEpisciencesLogs();
- logger.info("Episciences logs Done");
- logger.info("Processing Pedocs Old Stats");
- piwikstatsdb.uploadOldPedocs();
- logger.info("Processing Pedocs Old Stats Done");
- logger.info("Processing TUDELFT Stats");
- piwikstatsdb.uploadTUDELFTStats();
- logger.info("Processing TUDELFT Stats Done");
- logger.info("Processing B2SHARE Stats");
- piwikstatsdb.uploadB2SHAREStats();
- logger.info("Processing B2SHARE Stats Done");
-
- }
-
- LaReferenciaStats lastats = new LaReferenciaStats();
-
- if (ExecuteWorkflow.processLaReferenciaLogs) {
- logger.info("Processing LaReferencia logs");
- lastats.processLogs();
- logger.info("LaReferencia logs done");
- }
-
- IrusStats irusstats = new IrusStats();
-
- if (ExecuteWorkflow.irusProcessStats) {
- logger.info("Processing IRUS");
- irusstats.processIrusStats();
- logger.info("Irus done");
- }
-
- SarcStats sarcStats = new SarcStats();
-
- if (ExecuteWorkflow.sarcProcessStats) {
- sarcStats.processSarc();
- }
- logger.info("Sarc done");
-
- // finalize usagestats
- if (ExecuteWorkflow.finalizeStats) {
- piwikstatsdb.finalizeStats();
- logger.info("Finalized stats");
- }
-
- // Make the tables available to Impala
- if (ExecuteWorkflow.finalTablesVisibleToImpala) {
- logger.info("Making tables visible to Impala");
- invalidateMetadata();
- }
-
- logger.info("End");
- }
-
- private void invalidateMetadata() throws SQLException {
- Statement stmt = null;
-
- stmt = ConnectDB.getImpalaConnection().createStatement();
-
- String sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".downloads_stats";
- stmt.executeUpdate(sql);
-
- sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".views_stats";
- stmt.executeUpdate(sql);
-
- sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".usage_stats";
- stmt.executeUpdate(sql);
-
- sql = "INVALIDATE METADATA " + ConnectDB.getUsageStatsDBSchema() + ".pageviews_stats";
- stmt.executeUpdate(sql);
-
- sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".downloads_stats";
- stmt.executeUpdate(sql);
-
- sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".views_stats";
- stmt.executeUpdate(sql);
-
- sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".usage_stats";
- stmt.executeUpdate(sql);
-
- sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".pageviews_stats";
- stmt.executeUpdate(sql);
-
- sql = "INVALIDATE METADATA " + ConnectDB.getUsagestatsPermanentDBSchema() + ".counter_r5_stats_with_metrics";
- stmt.executeUpdate(sql);
-
- stmt.close();
- ConnectDB.getHiveConnection().close();
- }
-}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/ConnectDB.java
similarity index 99%
rename from dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java
rename to dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/ConnectDB.java
index be7ce8afa..4952ea85b 100755
--- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/ConnectDB.java
@@ -14,7 +14,7 @@ import java.util.Calendar;
import java.util.Date;
/**
- * @author D. Pierrakos, S. Zoupanos
+ * @author D. Pierrakos
*/
import com.mchange.v2.c3p0.ComboPooledDataSource;
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/EpisciencesViewsDownloads.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/EpisciencesViewsDownloads.java
new file mode 100755
index 000000000..8c7963a2c
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/EpisciencesViewsDownloads.java
@@ -0,0 +1,194 @@
+
+package eu.dnetlib.oa.graph.usagestatsbuild.export;
+
+import java.sql.*;
+import java.text.SimpleDateFormat;
+import java.util.*;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * @author D. Pierrakos
+ */
+public class EpisciencesViewsDownloads {
+
+ private String logPath;
+
+ private Statement stmt = null;
+
+ private static final Logger logger = LoggerFactory.getLogger(EpisciencesViewsDownloads.class);
+
+ public void processEpisciencesLogs() throws Exception {
+ try {
+
+ logger.info("Views Episciences processing starts at: " + new Timestamp(System.currentTimeMillis()));
+ episciencesViewsStats();
+ logger.info("Views Episciences processing ends at: " + new Timestamp(System.currentTimeMillis()));
+
+ logger.info("downloads Episciences processing starts at: " + new Timestamp(System.currentTimeMillis()));
+ episciencesDownloadsStats();
+ logger.info("Downloads Episciences processing ends at: " + new Timestamp(System.currentTimeMillis()));
+
+ } catch (Exception e) {
+ logger.error("Failed to process logs: " + e);
+ throw new Exception("Failed to process logs: " + e.toString(), e);
+ }
+ }
+
+ public void episciencesViewsStats() throws Exception {
+ logger.info("Creating episciences Views");
+
+ Statement stmt = ConnectDB.getHiveConnection().createStatement();
+ ConnectDB.getHiveConnection().setAutoCommit(false);
+
+ logger.info("Dropping Episcience Views Table");
+ String dropEpisciencesViewsTable = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ + ".episciencesviews ";
+ stmt.executeUpdate(dropEpisciencesViewsTable);
+ logger.info("Dropped Episcience Views Table");
+
+ logger.info("Creating Episcience Views Table");
+ String createEpisciencesViewsTable = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ + ".episciencesviews (source STRING, repository_id STRING, result_id STRING, date STRING, count INT, openaire INT)"
+ + " clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true') ";
+
+ stmt.executeUpdate(createEpisciencesViewsTable);
+
+ String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
+ + ConnectDB.getStatsDBSchema() +
+ ".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true) "
+ + "and substring(regexp_extract(websiteurl,'^([^\\\\.]+)\\\\.?',1),9)!='episciences'";
+
+ PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
+ .prepareStatement(returnEpisciencesJournals);
+ ResultSet rs = st.executeQuery();
+ while (rs.next()) {
+ String journal_openaire_id = rs.getString(1);
+ String episciencesSuffix = rs.getString(2);
+
+ logger.info("Working on journal_id:" + journal_openaire_id + " suffix:" + episciencesSuffix);
+ logger.info("Dropping episciencesSuffix_result_views_monthly_tmp table");
+ String dropepisciencesSuffixView = "DROP VIEW " + ConnectDB.getUsageStatsDBSchema()
+ + "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp";
+ // Statement stmtRS = ConnectDB.getHiveConnection().createStatement();
+ stmt.executeUpdate(dropepisciencesSuffixView);
+ logger.info("Dropped episciencesSuffix_result_views_monthly_tmp table");
+
+ logger.info("Creating episciencesSuffix_result_views_monthly_tmp table");
+
+ String create_result_views_monthly = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ + "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp "
+ + "AS SELECT entity_id, "
+ + "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
+ + "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
+ + "AS openaire_referrer, "
+ + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ + "FROM " + ConnectDB.getUsageStatsDBSchema()
+ + ".episcienceslogdistinct where action='action' and (source_item_type='oaItem' or "
+ + "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
+ + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
+ + "source ORDER BY source, entity_id";
+ stmt.executeUpdate(create_result_views_monthly);
+ logger.info("Created episciencesSuffix_result_views_monthly_tmp table");
+
+ logger.info("Inserting episciencesSuffix_result_views_monthly_tmp into EpisciencesViews Table");
+ String insertIntoEpisciencesViewsTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
+ + ".episciencesviews SELECT 'Episciences' as source, '"
+ + journal_openaire_id + "' as repository_id, ro.id as result_id, month as date,"
+ + " max(views) AS count, max(openaire_referrer) AS openaire "
+ + "FROM " + ConnectDB.getUsageStatsDBSchema()
+ + "." + episciencesSuffix.replace("-", "_") + "_result_views_monthly_tmp p,"
+ + ConnectDB.getStatsDBSchema()
+ + ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
+
+ stmt.executeUpdate(insertIntoEpisciencesViewsTable);
+ logger.info("Inserted episciencesSuffix_result_views_monthly_tmp into EpisciencesViews Table");
+
+ stmt.executeUpdate(dropepisciencesSuffixView);
+ logger.info("Dropped episciencesSuffix_result_views_monthly_tmp view");
+ }
+ rs.close();
+
+ logger.info("Episciences Views Created");
+ }
+
+ public void episciencesDownloadsStats() throws Exception {
+ logger.info("Creating episciences Downloads");
+
+ Statement stmt = ConnectDB.getHiveConnection().createStatement();
+ ConnectDB.getHiveConnection().setAutoCommit(false);
+
+ logger.info("Dropping Episcience Downloads Table");
+ String dropEpisciencesDownloadsTable = "DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ + ".episciencesvdownloads ";
+ stmt.executeUpdate(dropEpisciencesDownloadsTable);
+ logger.info("Dropped Episcience Downloads Table");
+
+ logger.info("Creating Episcience Downloads Table");
+ String createEpisciencesDownloadsTable = "CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ + ".episciencesdownloads (source STRING, repository_id STRING, result_id STRING, date STRING, count INT, openaire INT)"
+ + " clustered by (source) into 100 buckets stored as orc tblproperties('transactional'='true') ";
+
+ stmt.executeUpdate(createEpisciencesDownloadsTable);
+
+ String returnEpisciencesJournals = "SELECT id, substring(regexp_extract(websiteurl,'^([^\\.]+)\\.?',1),9) FROM "
+ + ConnectDB.getStatsDBSchema() +
+ ".datasource where websiteurl like '%episciences%' and (dateofvalidation is not null or harvested=true) "
+ + "and substring(regexp_extract(websiteurl,'^([^\\\\.]+)\\\\.?',1),9)!='episciences'";
+
+ PreparedStatement st = ConnectDB.DB_HIVE_CONNECTION
+ .prepareStatement(returnEpisciencesJournals);
+ ResultSet rs = st.executeQuery();
+ while (rs.next()) {
+ String journal_openaire_id = rs.getString(1);
+ String episciencesSuffix = rs.getString(2);
+
+ logger.info("Working on journal_id:" + journal_openaire_id + " suffix:" + episciencesSuffix);
+ logger.info("Dropping episciencesSuffix_result_downloads_monthly_tmp table");
+ String dropepisciencesSuffixDownloads = "DROP VIEW IF EXISTS " + ConnectDB.getUsageStatsDBSchema()
+ + "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp";
+ stmt.executeUpdate(dropepisciencesSuffixDownloads);
+
+ logger.info("Creating episciencesSuffix_result_downloads_monthly_tmp table");
+
+ String create_result_downloads_monthly = "CREATE OR REPLACE VIEW " + ConnectDB.getUsageStatsDBSchema()
+ + "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp "
+ + "AS SELECT entity_id, "
+ + "reflect('java.net.URLDecoder', 'decode', entity_id) AS id,"
+ + "COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) "
+ + "AS openaire_referrer, "
+ + "CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source "
+ + "FROM " + ConnectDB.getUsageStatsDBSchema()
+ + ".episcienceslogdistinct where action='download' and (source_item_type='oaItem' or "
+ + "source_item_type='repItem') and entity_id like '%" + episciencesSuffix + "%'"
+ + "GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), "
+ + "source ORDER BY source, entity_id";
+
+ stmt.executeUpdate(create_result_downloads_monthly);
+ logger.info("Created episciencesSuffix_result_downloads_monthly_tmp table");
+
+ logger.info("Inserting episciencesSuffix_result_downloads_monthly_tmp into EpisciencesDownloadsTable");
+ String insertIntoEpisciencesDownloadsTable = "INSERT INTO " + ConnectDB.getUsageStatsDBSchema()
+ + ".episciencesdownloads SELECT 'Episciences' as source, '"
+ + journal_openaire_id + "' as repository_id, ro.id as result_id, month as date,"
+ + " max(views) AS count, max(openaire_referrer) AS openaire "
+ + "FROM " + ConnectDB.getUsageStatsDBSchema()
+ + "." + episciencesSuffix.replace("-", "_") + "_result_downloads_monthly_tmp p,"
+ + ConnectDB.getStatsDBSchema()
+ + ".result_oids ro WHERE p.id=ro.oid GROUP BY ro.id, month ORDER BY ro.id, month";
+
+ stmt.executeUpdate(insertIntoEpisciencesDownloadsTable);
+ logger.info("Inserted episciencesSuffix_result_downloads_monthly_tmp into EpisciencesDownloadsTable");
+
+ stmt.executeUpdate(dropepisciencesSuffixDownloads);
+ logger.info("Dropped episciencesSuffix_result_downloads_monthly_tmp view");
+
+ }
+ rs.close();
+ }
+
+ private Connection getConnection() throws SQLException {
+ return ConnectDB.getHiveConnection();
+ }
+}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/ExecuteWorkflow.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/ExecuteWorkflow.java
new file mode 100755
index 000000000..825199709
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/ExecuteWorkflow.java
@@ -0,0 +1,55 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package eu.dnetlib.oa.graph.usagestatsbuild.export;
+
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.log4j.BasicConfigurator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+/**
+ * @author D. Pierrakos
+ */
+public class ExecuteWorkflow {
+
+ static String dbHiveUrl;
+ static String dbImpalaUrl;
+ static String usageRawDataDBSchema;
+ static String usageStatsDBSchema;
+ static String usagestatsPermanentDBSchema;
+ static String statsDBSchema;
+
+ public static void main(String args[]) throws Exception {
+
+ // Sending the logs to the console
+ BasicConfigurator.configure();
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+ IOUtils
+ .toString(
+ UsageStatsExporter.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/usagestatsupdate/export/usagestatupdate_parameters.json")));
+ parser.parseArgument(args);
+
+ dbHiveUrl = parser.get("dbHiveUrl");
+ dbImpalaUrl = parser.get("dbImpalaUrl");
+ usageRawDataDBSchema = parser.get("usageRawDataDB");
+ usageStatsDBSchema = parser.get("usageStatsDB");
+ usagestatsPermanentDBSchema = parser.get("usagestatsPermanentDBSchema");
+ statsDBSchema = parser.get("statsDB");
+
+ UsageStatsExporter usagestatsExport = new UsageStatsExporter();
+ usagestatsExport.export();
+ }
+}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/UsageStatsExporter.java
new file mode 100755
index 000000000..6474970ad
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usagestatsupdate/export/UsageStatsExporter.java
@@ -0,0 +1,37 @@
+
+package eu.dnetlib.oa.graph.usagestatsbuild.export;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.sql.Statement;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Main class for downloading and processing Usage statistics
+ *
+ * @author D. Pierrakos
+ */
+public class UsageStatsExporter {
+
+ public UsageStatsExporter() {
+
+ }
+
+ private static final Logger logger = LoggerFactory.getLogger(UsageStatsExporter.class);
+
+ public void export() throws Exception {
+
+ logger.info("Initialising DB properties");
+ ConnectDB.init();
+
+ EpisciencesViewsDownloads episciencesViewsDownloads = new EpisciencesViewsDownloads();
+ logger.info("Processing Episciences logs");
+ episciencesViewsDownloads.processEpisciencesLogs();
+ logger.info("Episciences logs Done");
+ }
+}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsbuild/export/usagestatsbuild_parameters.json b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsbuild/export/usagestatsbuild_parameters.json
deleted file mode 100755
index 242e5a477..000000000
--- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsbuild/export/usagestatsbuild_parameters.json
+++ /dev/null
@@ -1,92 +0,0 @@
-[
- {
- "paramName": "pmi",
- "paramLongName": "portalMatomoID",
- "paramDescription": "namoNode of the target cluster",
- "paramRequired": true
- },
- {
- "paramName": "dbhu",
- "paramLongName": "dbHiveUrl",
- "paramDescription": "activate tranform-only mode. Only apply transformation step",
- "paramRequired": true
- },
- {
- "paramName": "dbiu",
- "paramLongName": "dbImpalaUrl",
- "paramDescription": "activate tranform-only mode. Only apply transformation step",
- "paramRequired": true
- },
- {
- "paramName": "urdbs",
- "paramLongName": "usageRawDataDBSchema",
- "paramDescription": "activate tranform-only mode. Only apply transformation step",
- "paramRequired": true
- },
- {
- "paramName": "usdbs",
- "paramLongName": "usageStatsDBSchema",
- "paramDescription": "activate tranform-only mode. Only apply transformation step",
- "paramRequired": true
- },
- {
- "paramName": "sdbs",
- "paramLongName": "statsDBSchema",
- "paramDescription": "activate tranform-only mode. Only apply transformation step",
- "paramRequired": true
- },
- {
- "paramName": "uspdbs",
- "paramLongName": "usagestatsPermanentDBSchema",
- "paramDescription": "activate tranform-only mode. Only apply transformation step",
- "paramRequired": true
- },
- {
- "paramName": "rdbt",
- "paramLongName": "recreateDbAndTables",
- "paramDescription": "Re-create database and initial tables?",
- "paramRequired": true
- },
- {
- "paramName": "ppwl",
- "paramLongName": "processPiwikLogs",
- "paramDescription": "Process the piwiklogs (create & fill in the needed tables and process the data) based on the downloaded data",
- "paramRequired": true
- },
- {
- "paramName": "plrl",
- "paramLongName": "processLaReferenciaLogs",
- "paramDescription": "Process the La Referencia logs (create & fill in the needed tables and process the data) based on the downloaded data",
- "paramRequired": true
- },
- {
- "paramName": "ipr",
- "paramLongName": "irusProcessStats",
- "paramDescription": "Irus section: Process stats?",
- "paramRequired": true
- },
- {
- "paramName": "ipr",
- "paramLongName": "sarcProcessStats",
- "paramDescription": "Sarc section: Process stats?",
- "paramRequired": true
- },
- {
- "paramName": "fs",
- "paramLongName": "finalizeStats",
- "paramDescription": "Create the usage_stats table?",
- "paramRequired": true
- },
- {
- "paramName": "ftvi",
- "paramLongName": "finalTablesVisibleToImpala",
- "paramDescription": "Make the usage_stats, views_stats and downloads_stats tables visible to Impala",
- "paramRequired": true
- },
- {
- "paramName": "nodt",
- "paramLongName": "numberOfDownloadThreads",
- "paramDescription": "Number of download threads",
- "paramRequired": true
- }
-]
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsbuild/oozie_app/workflow.xml b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsbuild/oozie_app/workflow.xml
deleted file mode 100755
index 488578b24..000000000
--- a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsbuild/oozie_app/workflow.xml
+++ /dev/null
@@ -1,83 +0,0 @@
-
-
-
- hiveMetastoreUris
- Hive server metastore URIs
-
-
- hiveJdbcUrl
- Hive server jdbc url
-
-
- impalaJdbcUrl
- Impala server jdbc url
-
-
-
-
- ${jobTracker}
- ${nameNode}
-
-
- hive.metastore.uris
- ${hiveMetastoreUris}
-
-
- mapreduce.job.queuename
- ${queueName}
-
-
- oozie.launcher.mapred.job.queue.name
- ${oozieLauncherQueueName}
-
-
- spark.executor.memory
- 19166291558
-
-
- spark.yarn.executor.memoryOverhead
- 3225
-
-
- spark.driver.memory
- 11596411699
-
-
- spark.yarn.driver.memoryOverhead
- 1228
-
-
-
-
-
-
-
- Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
-
-
-
- eu.dnetlib.oa.graph.usagestatsbuild.export.ExecuteWorkflow
- --portalMatomoID${portalMatomoID}
- --dbHiveUrl${hiveJdbcUrl}
- --dbImpalaUrl${impalaJdbcUrl}
- --usageRawDataDBSchema${usageRawDataDBSchema}
- --usageStatsDBSchema${usageStatsDBSchema}
- --usagestatsPermanentDBSchema${usagestatsPermanentDBSchema}
- --statsDBSchema${statsDBSchema}
- --recreateDbAndTables${recreateDbAndTables}
- --processPiwikLogs${processPiwikLogs}
- --processLaReferenciaLogs${processLaReferenciaLogs}
- --irusProcessStats${irusProcessStats}
- --sarcProcessStats${sarcProcessStats}
- --finalizeStats${finalizeStats}
- --finalTablesVisibleToImpala${finalTablesVisibleToImpala}
- --numberOfDownloadThreads${numberOfDownloadThreads}
-
-
-
-
-
-
-
-
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/export/usagestatsupdate_parameters.json b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/export/usagestatsupdate_parameters.json
new file mode 100755
index 000000000..f6238f48c
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/export/usagestatsupdate_parameters.json
@@ -0,0 +1,44 @@
+[
+ {
+ "paramName": "pmi",
+ "paramLongName": "portalMatomoID",
+ "paramDescription": "OpenAIRE Explore Matomo",
+ "paramRequired": true
+ },
+ {
+ "paramName": "dbhu",
+ "paramLongName": "dbHiveUrl",
+ "paramDescription": "HIVE URL",
+ "paramRequired": true
+ },
+ {
+ "paramName": "dbiu",
+ "paramLongName": "dbImpalaUrl",
+ "paramDescription": "Impala URL",
+ "paramRequired": true
+ },
+ {
+ "paramName": "urdbs",
+ "paramLongName": "usageRawDataDB",
+ "paramDescription": "Usage Raw DB",
+ "paramRequired": true
+ },
+ {
+ "paramName": "usdbs",
+ "paramLongName": "usageStatsDB",
+ "paramDescription": "Usage Stats DB",
+ "paramRequired": true
+ },
+ {
+ "paramName": "sdbs",
+ "paramLongName": "statsDB",
+ "paramDescription": "Stats DB",
+ "paramRequired": true
+ },
+ {
+ "paramName": "uspdbs",
+ "paramLongName": "usagestatsPermanentDB",
+ "paramDescription": "Shadow Usage Stats DB",
+ "paramRequired": true
+ }
+]
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsbuild/oozie_app/config-default.xml b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/config-default.xml
similarity index 100%
rename from dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsbuild/oozie_app/config-default.xml
rename to dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/config-default.xml
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/invalidate_metadata.sh b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/invalidate_metadata.sh
new file mode 100644
index 000000000..db8d39af2
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/invalidate_metadata.sh
@@ -0,0 +1,21 @@
+export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
+export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
+if ! [ -L $link_folder ]
+then
+ rm -Rf "$link_folder"
+ ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
+fi
+
+export SOURCE=$1
+export TARGET=$2
+export SHADOW=$3
+
+impala-shell -q "invalidate metadata;"
+impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f -
+echo "Impala shell finished"
+
+echo "Updating shadow observatory database"
+impala-shell -q "create database if not exists ${SHADOW}"
+impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -f -
+impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${TARGET}.\1;/" | impala-shell -f -
+echo "Shadow db ready!"
\ No newline at end of file
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step1.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step1.sql
new file mode 100644
index 000000000..3e86ddc07
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step1.sql
@@ -0,0 +1,8 @@
+--------------------------------------------------------------
+--------------------------------------------------------------
+-- Usage Stats database creation
+--------------------------------------------------------------
+--------------------------------------------------------------
+
+DROP database IF EXISTS ${usageStatsDB} CASCADE;
+CREATE database ${usageStatsDB};
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step10.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step10.sql
new file mode 100644
index 000000000..0f3dbf6ed
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step10.sql
@@ -0,0 +1,10 @@
+-- LaReferencia Distinct
+DROP TABLE IF EXISTS ${usageStatsDB}.lareferencialogdistinct;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.lareferencialogdistinct(matomoid INT, source INT, id_visit STRING, country STRING, action STRING, url STRING,
+entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING)
+clustered by (source, id_visit, action, timestamp, entity_id)
+into 100 buckets stored as orc tblproperties('transactional'='true');
+
+INSERT INTO ${usageStatsDB}.lareferencialogdistinct
+SELECT DISTINCT * FROM ${usageRawDataDB}.lareferencialog WHERE entity_id is not null;
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step11.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step11.sql
new file mode 100644
index 000000000..c736124d0
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step11.sql
@@ -0,0 +1,18 @@
+--LaReferencia views
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.la_result_views_monthly_tmp AS
+SELECT entity_id AS id, COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%'
+THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.lareferencialogdistinct where action='action' and
+(source_item_type='oaItem' or source_item_type='repItem')
+GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.la_views_stats_tmp;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.la_views_stats_tmp
+AS SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date,
+max(views) AS count, max(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.la_result_views_monthly_tmp p, ${statasDB}.datasource_oids d,
+${statsDB}.result_oids ro WHERE p.source=d.oid AND p.id=ro.oid GROUP BY d.id, ro.id, month;
+
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step12.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step12.sql
new file mode 100644
index 000000000..7e3d1cd82
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step12.sql
@@ -0,0 +1,17 @@
+--LaReferencia downloads
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.la_result_downloads_monthly_tmp AS
+SELECT entity_id AS id, COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%'
+THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.lareferencialogdistinct where action='download' and
+(source_item_type='oaItem' or source_item_type='repItem')
+GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.la_downloads_stats_tmp;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.la_downloads_stats_tmp
+AS SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date,
+max(downloads) AS count, max(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.la_result_downloads_monthly_tmp p, ${statsDB}.datasource_oids d,
+${statsDB}.result_oids ro WHERE p.source=d.oid AND p.id=ro.oid GROUP BY d.id, ro.id, month;
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step13.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step13.sql
new file mode 100644
index 000000000..e6b7ca2e5
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step13.sql
@@ -0,0 +1,112 @@
+--LaReferencia CoP R5
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.lr_view_unique_item_investigations
+AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_investigations,
+SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.lareferencialogdistinct WHERE (source_item_type='oaItem' or source_item_type='repItem')
+AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.lr_tbl_unique_item_investigations;
+
+CREATE TABLE ${usageStatsDB}.lr_tbl_unique_item_investigations as
+SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date,
+sum(unique_item_investigations) AS unique_item_investigations, sum(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.lr_view_unique_item_investigations p, ${statsDB}.datasource d,
+${statsDB}.result_oids ro
+WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
+AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
+GROUP BY d.id, ro.id, month;
+
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.lr_view_total_item_investigations
+AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+COUNT(entity_id) AS total_item_investigations,
+SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.lareferencialogdistinct
+WHERE (source_item_type='oaItem' or source_item_type='repItem')
+AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.lr_tbl_total_item_investigations;
+
+CREATE TABLE ${usageStatsDB}.lr_tbl_total_item_investigations as
+SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date,
+sum(total_item_investigations) AS total_item_investigations, sum(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.lr_view_total_item_investigations p, ${statsDB}.datasource d,
+${statsDB}.result_oids ro
+WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
+AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
+GROUP BY d.id, ro.id, month;
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.lr_view_unique_item_requests AS
+SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_requests,
+SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.lareferencialogdistinct
+WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem')
+AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.lr_tbl_unique_item_requests;
+
+CREATE TABLE ${usageStatsDB}.lr_tbl_unique_item_requests as
+SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date,
+sum(unique_item_requests) AS unique_item_requests, sum(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.lr_view_unique_item_requests p, ${statsDB}.datasource d,${statsDB}.result_oids ro
+WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
+AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
+GROUP BY d.id, ro.id, month;
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.lr_view_total_item_requests
+AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+COUNT(entity_id) AS total_item_requests,
+SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.lareferencialogdistinct
+WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem')
+AND entity_id is NOT NULL
+GROUP BY id_visit, entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.lr_tbl_total_item_requests;
+
+CREATE TABLE ${usageStatsDB}.lr_tbl_total_item_requests as
+SELECT 'LaReferencia' as source, d.id as repository_id, ro.id as result_id, month as date,
+sum(total_item_requests) AS total_item_requests, sum(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.view_total_item_requests p, ${statsDB}.datasource d,
+${statsDB}.result_oids ro
+WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
+AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
+GROUP BY d.id, ro.id, month;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.lr_tbl_all_r5_metrics;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.lr_tbl_all_r5_metrics as
+WITH tmp1 as (SELECT coalesce(ds.repository_id, vs.repository_id) as repository_id,
+coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
+coalesce(vs.unique_item_investigations, 0) as unique_item_investigations, coalesce(ds.total_item_investigations, 0) as total_item_investigations
+FROM ${usageStatsDB}.lr_tbl_unique_item_investigations AS vs
+FULL OUTER JOIN
+${usageStatsDB}.lr_tbl_total_item_investigations AS ds
+ON ds.source=vs.source AND ds.result_id=vs.result_id AND ds.date=vs.date),
+tmp2 AS (select coalesce (ds.repository_id, vs.repository_id) as repository_id,
+coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
+coalesce(ds.total_item_investigations, 0) as total_item_investigations,
+coalesce(ds.unique_item_investigations, 0) as unique_item_investigations,
+coalesce(vs.unique_item_requests, 0) as unique_item_requests FROM tmp1 AS ds
+FULL OUTER JOIN
+${usageStatsDB}.lr_tbl_unique_item_requests AS vs
+ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date)
+SELECT 'LaReferencia' as source, coalesce (ds.repository_id, vs.repository_id) as repository_id,
+coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
+coalesce(ds.unique_item_investigations, 0) as unique_item_investigations,
+coalesce(ds.total_item_investigations, 0) as total_item_investigations,
+coalesce(ds.unique_item_requests, 0) as unique_item_requests,
+coalesce(vs.total_item_requests, 0) as total_item_requests FROM tmp2 AS ds
+FULL OUTER JOIN
+${usageStatsDB}.lr_tbl_total_item_requests AS vs ON ds.repository_id=vs.repository_id
+AND ds.result_id=vs.result_id AND ds.date=vs.date;
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step14.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step14.sql
new file mode 100644
index 000000000..562918456
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step14.sql
@@ -0,0 +1,31 @@
+--IRUS Stats
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.irus_downloads_stats_tmp
+(`source` string,
+`repository_id` string,
+`result_id` string,
+`date` string,
+`count` bigint,
+`openaire` bigint);
+
+INSERT INTO ${usageStatsDB}.irus_downloads_stats_tmp
+SELECT s.source, d.id AS repository_id,
+ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date, s.count, '0'
+FROM ${usageRawDataDB}.sushilog s, ${statsDB}.datasource_oids d, ${statsDB}.result_oids ro
+WHERE s.repository=d.oid AND s.rid=ro.oid AND metric_type='ft_total' AND s.source='IRUS-UK';
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.irus_R5_stats_tmp
+(`source` string,
+`repository_id` string,
+`result_id` string,
+`date` string,
+`views` bigint,
+`downloads` bigint,
+`openaire` bigint);
+
+INSERT INTO ${usageStatsDB}.irus_R5_stats_tmp
+SELECT s.source, d.id AS repository_id,
+ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date,
+(s.total_item_investigations-s.total_item_requests) as views, s.total_item_requests as downloads, '0'
+FROM ${usageRawDataDB}.sushilog_cop_r5 s, ${statsDB}.datasource_oids d, ${statsDB}.result_oids ro
+WHERE s.repository=d.oid AND s.rid=ro.oid AND s.source='IRUS-UK';
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step15.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step15.sql
new file mode 100644
index 000000000..c4ce5a4c7
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step15.sql
@@ -0,0 +1,18 @@
+--SARC Downloads
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.sarc_downloads_stats_tmp
+(`source` string,
+`repository_id` string,
+`result_id` string,
+`date` string,
+`count` bigint,
+`openaire` bigint);
+
+INSERT INTO ${usageStatsDB}.sarc_downloads_stats_tmp
+SELECT s.source, d.id AS repository_id,
+ro.id as result_id, CONCAT(CAST(YEAR(`date`) AS STRING), '/',
+LPAD(CAST(MONTH(`date`) AS STRING), 2, '0')) AS `date`, s.count, '0'
+FROM ${usageRawDataDB}.sushilog s, ${statsDB}.datasource_oids d, ${statsDB}.result_pids ro
+WHERE d.oid LIKE CONCAT('%', s.repository, '%') AND d.id like CONCAT('%', 'sarcservicod', '%')
+AND s.rid=ro.pid AND ro.type='Digital Object Identifier' AND s.metric_type='ft_total' AND s.source='SARC-OJS';
+
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step16.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step16.sql
new file mode 100644
index 000000000..f0f95a5d1
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step16.sql
@@ -0,0 +1,131 @@
+--Finalize USAGE Stats
+
+DROP TABLE IF EXISTS ${usageStatsDB}.views_stats;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.downloads_stats;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.pageviews_stats;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.usage_stats;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.project_stats;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.download_stats;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.views_stats
+LIKE ${usageStatsDB}.openaire_views_stats_tmp STORED AS PARQUET;
+
+INSERT INTO ${usageStatsDB}.views_stats
+SELECT * FROM ${usageStatsDB}.openaire_views_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.views_stats
+SELECT * FROM ${usageStatsDB}.episciencesviews;
+
+INSERT INTO ${usageStatsDB}.views_stats
+SELECT * FROM ${usageStatsDB}.pedocs_views_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.views_stats
+SELECT * FROM ${usageStatsDB}.tudelft_views_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.views_stats
+SELECT * FROM ${usageStatsDB}.la_views_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.views_stats
+SELECT * FROM ${usageStatsDB}.b2share_views_stats_tmp;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.downloads_stats
+LIKE ${usageStatsDB}.openaire_downloads_stats_tmp STORED AS PARQUET;
+
+INSERT INTO ${usageStatsDB}.downloads_stats
+SELECT * FROM ${usageStatsDB}.openaire_downloads_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.downloads_stats
+SELECT * FROM ${usageStatsDB}.episciencesdownloads;
+
+INSERT INTO ${usageStatsDB}.downloads_stats
+SELECT * FROM ${usageStatsDB}.pedocs_downloads_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.downloads_stats
+SELECT * FROM ${usageStatsDB}.tudelft_downloads_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.downloads_stats
+SELECT * FROM ${usageStatsDB}.b2share_downloads_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.downloads_stats
+SELECT * FROM ${usageStatsDB}.la_downloads_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.downloads_stats
+SELECT * FROM ${usageStatsDB}.irus_downloads_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.views_stats
+SELECT source, repository_id, result_id, `date`, views, openaire FROM ${usageStatsDB}.irus_R5_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.downloads_stats
+SELECT source, repository_id, result_id, `date`, downloads, openaire FROM ${usageStatsDB}.irus_R5_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.downloads_stats
+SELECT * FROM ${usageStatsDB}.sarc_downloads_stats_tmp;
+
+INSERT INTO ${usageStatsDB}.views_stats
+SELECT * FROM ${usageStatsDB}.datacite_views;
+
+INSERT INTO ${usageStatsDB}.downloads_stats
+SELECT * FROM ${usageStatsDB}.datacite_downloads;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.pageviews_stats
+LIKE ${usageStatsDB}.openaire_pageviews_stats_tmp STORED AS PARQUET;
+
+INSERT INTO ${usageStatsDB}.pageviews_stats
+SELECT * FROM ${usageStatsDB}.openaire_pageviews_stats_tmp;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.usage_stats AS
+SELECT coalesce(ds.source, vs.source) as source,
+coalesce(ds.repository_id, vs.repository_id) as repository_id,
+coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
+coalesce(ds.count, 0) as downloads, coalesce(vs.count, 0) as views,
+coalesce(ds.openaire, 0) as openaire_downloads,
+coalesce(vs.openaire, 0) as openaire_views
+FROM ${usageStatsDB}.downloads_stats AS ds
+FULL OUTER JOIN ${usageStatsDB}.views_stats AS vs ON ds.source=vs.source
+AND ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.project_stats AS
+with project_views as (select id, sum(views) views, sum(openaire_views) openaire_views,`date`
+from ${usageStatsDB}.usage_stats
+join ${statsDB}.project_results on result_id=result group by id,`date`),
+project_downloads as
+(select id, sum(downloads) downloads,sum(openaire_downloads) openaire_downloads,`date`
+from ${usageStatsDB}.usage_stats
+join ${statsDB}.project_results on result_id=result group by id,`date`)
+SELECT coalesce(pv.id, pd.id) as id, coalesce(pd.`date`, pv.`date`) as `date`,
+coalesce(pv.views, 0) as views, coalesce(pd.downloads, 0) as downloads,
+coalesce(pv.openaire_views) as openaire_views,coalesce(pd.openaire_downloads, 0) as openaire_downloads
+FROM project_downloads pd FULL OUTER JOIN project_views pv ON pd.id=pv.id WHERE pd.`date`=pv.`date`;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.datasource_stats AS
+with datasource_views as
+(select repository_id, sum(views) views, sum(openaire_views) openaire_views,`date`
+from ${usageStatsDB}.usage_stats group by repository_id,`date`),
+datasource_downloads as
+(select repository_id, sum(downloads) downloads,sum(openaire_downloads) openaire_downloads,`date`
+from ${usageStatsDB}.usage_stats group by repository_id,`date`)
+SELECT coalesce(dv.repository_id, dd.repository_id) as repositor_id,
+coalesce(dd.`date`, dv.`date`) as `date`, coalesce(dv.views, 0) as views,
+coalesce(dd.downloads, 0) as downloads,
+coalesce(dv.openaire_views) as openaire_views,coalesce(dd.openaire_downloads, 0) as openaire_downloads
+FROM datasource_downloads dd FULL OUTER JOIN
+datasource_views dv ON dd.repository_id=dv.repository_id WHERE dd.`date`=dv.`date`;
+
+
+INSERT INTO ${usageStatsDB}.tbl_all_r5_metrics
+SELECT * FROM ${usageStatsDB}.lr_tbl_all_r5_metrics;
+
+INSERT INTO ${usageStatsDB}.tbl_all_r5_metrics
+SELECT s.source, d.id AS repository_id,
+ro.id as result_id, CONCAT(YEAR(date), '/', LPAD(MONTH(date), 2, '0')) as date,
+s.unique_item_investigations , s.total_item_investigations,
+s.unique_item_requests, s.total_item_requests
+FROM ${usageStatsDB}.sushilog_cop_r5 s, ${statsDB}.datasource_oids d, ${statsDB}.result_oids ro
+WHERE s.repository=d.oid AND s.rid=ro.oid AND s.source='IRUS-UK';
+
+
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step17.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step17.sql
new file mode 100644
index 000000000..390e715de
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step17.sql
@@ -0,0 +1,39 @@
+-- Shadow DB
+DROP database IF EXISTS ${usagestatsPermanentDBSchema} CASCADE;
+CREATE database ${usagestatsPermanentDBSchema};
+
+DROP VIEW IF EXISTS ${usagestatsPermanentDBSchema}.views_stats;
+
+CREATE VIEW IF NOT EXISTS ${usagestatsPermanentDBSchema}.views_stats
+AS SELECT * FROM ${usageStatsDB}.views_stats;
+
+DROP VIEW IF EXISTS ${usagestatsPermanentDBSchema}.pageviews_stats;
+
+CREATE VIEW IF NOT EXISTS ${usagestatsPermanentDBSchema}.pageviews_stats
+AS SELECT * FROM ${usageStatsDB}.pageviews_stats;
+
+DROP VIEW IF EXISTS ${usagestatsPermanentDBSchema}.downloads_stats;
+
+CREATE VIEW IF NOT EXISTS ${usagestatsPermanentDBSchema}.downloads_stats
+AS SELECT * FROM ${usageStatsDB}.downloads_stats;
+
+DROP VIEW IF EXISTS ${usagestatsPermanentDBSchema}.usage_stats;
+
+CREATE VIEW IF NOT EXISTS ${usagestatsPermanentDBSchema}.usage_stats
+AS SELECT * FROM ${usageStatsDB}.usage_stats;
+
+DROP VIEW IF EXISTS ${usagestatsPermanentDBSchema}.project_stats;
+
+CREATE VIEW IF NOT EXISTS ${usagestatsPermanentDBSchema}.project_stats
+AS SELECT * FROM ${usageStatsDB}.project_stats;
+
+DROP VIEW IF EXISTS ${usagestatsPermanentDBSchema}.datasource_stats;
+
+CREATE VIEW IF NOT EXISTS ${usagestatsPermanentDBSchema}.datasource_stats
+AS SELECT * FROM ${usageStatsDB}.datasource_stats;
+
+DROP VIEW IF EXISTS ${usagestatsPermanentDBSchema}.counter_r5_stats_with_metrics;
+
+CREATE VIEW IF NOT EXISTS ${usagestatsPermanentDBSchema}.counter_r5_stats_with_metrics
+AS SELECT * FROM ${usageStatsDB}.tbl_all_r5_metrics;
+
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step18.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step18.sql
new file mode 100644
index 000000000..02401fbe1
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step18.sql
@@ -0,0 +1,18 @@
+export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
+export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
+if ! [ -L $link_folder ]
+then
+ rm -Rf "$link_folder"
+ ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
+fi
+
+SHADOW=$1
+
+impala-shell -i impala-cluster-dn1.openaire.eu -q "INVALIDATE METADATA ${SHADOW}.views_stats;"
+impala-shell -i impala-cluster-dn1.openaire.eu -q "INVALIDATE METADATA ${SHADOW}.pageviews_stats;"
+impala-shell -i impala-cluster-dn1.openaire.eu -q "INVALIDATE METADATA ${SHADOW}.downloads_stats;"
+impala-shell -i impala-cluster-dn1.openaire.eu -q "INVALIDATE METADATA ${SHADOW}.usage_stats;"
+impala-shell -i impala-cluster-dn1.openaire.eu -q "INVALIDATE METADATA ${SHADOW}.counter_r5_stats_with_metrics;"
+impala-shell -i impala-cluster-dn1.openaire.eu -q "INVALIDATE METADATA ${SHADOW}.project_stats;"
+impala-shell -i impala-cluster-dn1.openaire.eu -q "INVALIDATE METADATA ${SHADOW}.datasource_stats;"
+
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step2.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step2.sql
new file mode 100644
index 000000000..49213c8f0
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step2.sql
@@ -0,0 +1,7 @@
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.piwiklogdistinct(source INT, id_visit STRING, country STRING,
+action STRING, url STRING, entity_id STRING, source_item_type STRING, timestamp STRING,
+referrer_name STRING, agent STRING) clustered by (source, id_visit, action, timestamp, entity_id)
+into 100 buckets stored as orc tblproperties('transactional'='true');
+
+INSERT INTO ${usageStatsDB}.piwiklogdistinct
+SELECT DISTINCT * FROM ${usageRawDataDB}.piwiklog WHERE entity_id is not null;
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step3.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step3.sql
new file mode 100644
index 000000000..8f0d1a276
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step3.sql
@@ -0,0 +1,28 @@
+--OpenAIRE Views
+
+DROP VIEW IF EXISTS ${usageStatsDB}.openaire_piwikresult_views_monthly_tmp;
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.openaire_result_views_monthly_tmp
+AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END)
+AS openaire_referrer, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.piwiklogdistinct where action='action'
+and (source_item_type='oaItem' or source_item_type='repItem')
+GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.openaire_views_stats_tmp;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.openaire_views_stats_tmp
+AS SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
+max(views) AS count, max(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.openaire_result_views_monthly_tmp p, ${statsDB}.datasource d,
+${statsDB}.result_oids ro WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200'
+AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503'
+AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' GROUP BY d.id, ro.id, month;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.openaire_pageviews_stats_tmp AS SELECT
+'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date, max(views) AS count
+FROM ${usageStatsDB}.openaire_result_views_monthly_tmp p, ${statsDB}.datasource d, ${statsDB}.result_oids ro
+WHERE p.source=${portalMatomoID} AND p.source=d.piwik_id and p.id=ro.id AND ro.oid!='200'
+AND ro.oid!='204' AND ro.oid!='404' AND ro.oid!='400' AND ro.oid!='503'
+AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6' GROUP BY d.id, ro.id, month;
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step4.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step4.sql
new file mode 100644
index 000000000..3ecdc0c8c
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step4.sql
@@ -0,0 +1,24 @@
+--OpenAIRE Downloads
+
+DROP VIEW IF EXISTS ${usageStatsDB}.openaire_result_downloads_monthly_tmp;
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.openaire_result_downloads_monthly_tmp
+AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+COUNT(entity_id) as downloads, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END)
+AS openaire_referrer, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.piwiklogdistinct where action='download'
+AND (source_item_type='oaItem' OR source_item_type='repItem')
+GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) , source;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.openaire_downloads_stats_tmp;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.openaire_downloads_stats_tmp AS
+SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
+max(downloads) AS count, max(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.openaire_result_downloads_monthly_tmp p,
+${statsDB}.datasource d, ${statsDB}.result_oids ro
+WHERE p.source=d.piwik_id and p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
+AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
+GROUP BY d.id, ro.id, month;
+
+DROP VIEW IF EXISTS ${usageStatsDB}.openaire_result_downloads_monthly_tmp;
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step5.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step5.sql
new file mode 100644
index 000000000..ce0e57eab
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step5.sql
@@ -0,0 +1,108 @@
+--CoP R5
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.view_unique_item_investigations
+AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_investigations,
+SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.piwiklogdistinct
+WHERE (source_item_type='oaItem' or source_item_type='repItem')
+AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.tbl_unique_item_investigations ";
+
+CREATE TABLE ${usageStatsDB}.tbl_unique_item_investigations as
+SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
+sum(unique_item_investigations) AS unique_item_investigations, sum(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.view_unique_item_investigations p, ${statsDB}.datasource d, ${statsDB}.result_oids ro
+WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
+AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
+GROUP BY d.id, ro.id, month;
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.view_total_item_investigations
+AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+COUNT(entity_id) AS total_item_investigations,
+SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.piwiklogdistinct WHERE (source_item_type='oaItem' or source_item_type='repItem')
+AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.tbl_total_item_investigations;
+
+CREATE TABLE ${usageStatsDB}.tbl_total_item_investigations AS
+SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
+sum(total_item_investigations) AS total_item_investigations, sum(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.view_total_item_investigations p, ${statsDB}.datasource d,${statsDB}.result_oids ro
+WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
+AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
+GROUP BY d.id, ro.id, month;
+
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.view_unique_item_requests AS
+SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+CASE WHEN COUNT(entity_id)>1 THEN 1 ELSE 1 END AS unique_item_requests,
+SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.piwiklogdistinct
+WHERE action='download' AND (source_item_type='oaItem' or source_item_type='repItem')
+AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.tbl_unique_item_requests";
+
+CREATE TABLE ${usageStatsDB}.tbl_unique_item_requests as
+SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
+sum(unique_item_requests) AS unique_item_requests, sum(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.view_unique_item_requests p, ${statsDB}.datasource d,${statsDB}.result_oids ro
+WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
+AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
+GROUP BY d.id, ro.id, month;
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.view_total_item_requests
+AS SELECT id_visit, entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+COUNT(entity_id) AS total_item_requests,
+SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.piwiklogdistinct WHERE action='download'
+AND (source_item_type='oaItem' or source_item_type='repItem')
+AND entity_id is NOT NULL GROUP BY id_visit, entity_id,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+DROP TABLE IF EXISTS $usageStatsDB.tbl_total_item_requests;
+
+CREATE TABLE ${usageStatsDB}.tbl_total_item_requests as
+SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
+sum(total_item_requests) AS total_item_requests, sum(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.view_total_item_requests p, ${statsDB}.datasource d, ${statsDB}.result_oids ro
+WHERE p.source=d.piwik_id AND p.id=ro.oid AND ro.oid!='200' AND ro.oid!='204' AND ro.oid!='404'
+AND ro.oid!='400' AND ro.oid!='503' AND d.id!='re3data_____::7b0ad08687b2c960d5aeef06f811d5e6'
+GROUP BY d.id, ro.id, month;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.tbl_all_r5_metrics;
+
+CREATE TABLE IF NOT EXISTS ${statsDB}.tbl_all_r5_metrics as
+WITH tmp1 as (SELECT coalesce(ds.repository_id, vs.repository_id) as repository_id,
+coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
+coalesce(vs.unique_item_investigations, 0) as unique_item_investigations,
+coalesce(ds.total_item_investigations, 0) as total_item_investigations
+FROM ${usageStatsDB}.tbl_unique_item_investigations AS vs
+FULL OUTER JOIN
+${usageStatsDB}.tbl_total_item_investigations AS ds
+ON ds.source=vs.source AND ds.result_id=vs.result_id AND ds.date=vs.date),
+tmp2 AS (select coalesce (ds.repository_id, vs.repository_id) as repository_id,
+coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
+coalesce(ds.total_item_investigations, 0) as total_item_investigations,
+coalesce(ds.unique_item_investigations, 0) as unique_item_investigations,
+coalesce(vs.unique_item_requests, 0) as unique_item_requests FROM tmp1
+AS ds FULL OUTER JOIN ${usageStatsDB}.tbl_unique_item_requests AS vs
+ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date)
+SELECT 'OpenAIRE' as source, coalesce (ds.repository_id, vs.repository_id) as repository_id,
+coalesce(ds.result_id, vs.result_id) as result_id, coalesce(ds.date, vs.date) as date,
+coalesce(ds.unique_item_investigations, 0) as unique_item_investigations,
+coalesce(ds.total_item_investigations, 0) as total_item_investigations,
+coalesce(ds.unique_item_requests, 0) as unique_item_requests,
+coalesce(vs.total_item_requests, 0) as total_item_requests
+FROM tmp2 AS ds FULL OUTER JOIN ${usageStatsDB}.tbl_total_item_requests
+AS vs ON ds.repository_id=vs.repository_id AND ds.result_id=vs.result_id AND ds.date=vs.date;
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step6.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step6.sql
new file mode 100644
index 000000000..4fe9e7881
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step6.sql
@@ -0,0 +1,13 @@
+--Episciences log
+
+DROP TABLE IF EXISTS ${usageStatsDB}.episcienceslogdistinct;
+
+CREATE TABLE IF NOT EXISTS
+${usageStatsDB}.episcienceslogdistinct(source INT, id_visit STRING,
+country STRING, action STRING, url STRING,
+entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING)
+clustered by (source, id_visit, action, timestamp, entity_id)
+into 100 buckets stored as orc tblproperties('transactional'='true');
+
+INSERT INTO ${usageStatsDB}.episcienceslogdistinct
+SELECT DISTINCT * FROM ${usageStatsDB}.episcienceslog WHERE entity_id is not null;
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step7.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step7.sql
new file mode 100644
index 000000000..c60985608
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step7.sql
@@ -0,0 +1,15 @@
+--PeDOCS old data
+
+DROP TABLE IF EXISTS ${usageStatsDB}.pedocs_views_stats_tmp;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.pedocs_downloads_stats_tmp;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.pedocs_views_stats_tmp AS
+SELECT 'OpenAIRE' as source, 'opendoar____::ab1a4d0dd4d48a2ba1077c4494791306' as repository_id,
+r.id as result_id,date,counter_abstract as count, 0 as openaire
+FROM ${usageRawDataDB}.pedocsoldviews p, ${statsDB}.result_oids r where r.oid=p.identifier;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.pedocs_downloads_stats_tmp AS
+SELECT 'OpenAIRE' as source, 'opendoar____::ab1a4d0dd4d48a2ba1077c4494791306' as repository_id,
+r.id as result_id, date, counter as count, 0 as openaire
+FROM ${usageRawDataDB}.pedocsolddownloads p, ${statsDB}.result_oids r where r.oid=p.identifier;
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step8.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step8.sql
new file mode 100644
index 000000000..cc2864b2c
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step8.sql
@@ -0,0 +1,43 @@
+--TU DELFT
+
+DROP view IF EXISTS ${usageStatsDB}.tudelft_result_views_monthly_tmp;
+
+DROP view IF EXISTS ${usageStatsDB}.tudelft_result_downloads_monthly_tmp;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.tudelft_views_stats_tmp;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.tudelft_downloads_stats_tmp;
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.tudelft_result_views_monthly_tmp
+AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.piwiklogdistinct
+WHERE action='action' and (source_item_type='oaItem' or source_item_type='repItem') and source=252
+GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.tudelft_views_stats_tmp AS
+SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
+max(views) AS count, max(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.tudelft_result_views_monthly_tmp p, ${statsDB}.datasource d,
+${statsDB}.result_oids ro WHERE concat('tud:',p.id)=ro.oid and d.id='opendoar____::c9892a989183de32e976c6f04e700201'
+GROUP BY d.id, ro.id, month ;
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.tudelft_result_downloads_monthly_tmp
+AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.piwiklogdistinct
+WHERE action='download' and (source_item_type='oaItem' or source_item_type='repItem') and source=252
+GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.tudelft_downloads_stats_tmp AS
+SELECT 'OpenAIRE' as source, d.id as repository_id, ro.id as result_id, month as date,
+max(views) AS count, max(openaire_referrer) AS openaire FROM ${usageStatsDB}.tudelft_result_downloads_monthly_tmp p,
+${statsDB}.datasource d, ${statsDB}.result_oids ro
+WHERE concat('tud:',p.id)=ro.oid and d.id='opendoar____::c9892a989183de32e976c6f04e700201'
+GROUP BY d.id, ro.id, month;
+
+DROP view IF EXISTS ${usageStatsDB}.tudelft_result_views_monthly_tmp;
+
+DROP view IF EXISTS ${usageStatsDB}.tudelft_result_downloads_monthly_tmp;
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step9.sql b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step9.sql
new file mode 100644
index 000000000..ff1e04d77
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/scripts/Step9.sql
@@ -0,0 +1,43 @@
+--B2SHARE
+
+DROP view IF EXISTS ${usageStatsDB}.b2share_result_views_monthly_tmp;
+
+DROP view IF EXISTS ${usageStatsDB}.b2share_result_downloads_monthly_tmp";
+
+DROP TABLE IF EXISTS ${usageStatsDB}.b2share_views_stats_tmp;
+
+DROP TABLE IF EXISTS ${usageStatsDB}.b2share_downloads_stats_tmp;
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.b2share_result_views_monthly_tmp
+AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.piwiklogdistinct
+WHERE action='action' and (source_item_type='oaItem' or source_item_type='repItem') and source=412
+GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.b2share_views_stats_tmp AS
+SELECT 'B2SHARE' as source, d.id as repository_id, ro.id as result_id, month as date,
+max(views) AS count, max(openaire_referrer) AS openaire
+FROM ${usageStatsDB}.b2share_result_views_monthly_tmp p, ${statsDB}.datasource d, ${statsDB}.result_oids ro
+WHERE p.id=ro.oid and d.id='re3data_____::ad3609c351bd520edf6f10f5e0d9b877'
+GROUP BY d.id, ro.id, month;
+
+CREATE OR REPLACE VIEW ${usageStatsDB}.b2share_result_downloads_monthly_tmp
+AS SELECT entity_id, reflect('java.net.URLDecoder', 'decode', entity_id) AS id,
+COUNT(entity_id) as views, SUM(CASE WHEN referrer_name LIKE '%openaire%' THEN 1 ELSE 0 END) AS openaire_referrer,
+CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')) AS month, source
+FROM ${usageStatsDB}.piwiklogdistinct
+WHERE action='download' and (source_item_type='oaItem' or source_item_type='repItem') and source=412
+GROUP BY entity_id, CONCAT(YEAR(timestamp), '/', LPAD(MONTH(timestamp), 2, '0')), source;
+
+CREATE TABLE IF NOT EXISTS ${usageStatsDB}.b2share_downloads_stats_tmp AS
+SELECT 'B2SHARE' as source, d.id as repository_id, ro.id as result_id, month as date,
+max(views) AS count, max(openaire_referrer) AS openaire FROM ${usageStatsDB}.b2share_result_downloads_monthly_tmp p,
+${statsDB}.datasource d, ${statsDB}.result_oids ro
+WHERE p.id=ro.oid and d.id='re3data_____::ad3609c351bd520edf6f10f5e0d9b877'
+GROUP BY d.id, ro.id, month;
+
+DROP view IF EXISTS ${usageStatsDB}.b2share_result_views_monthly_tmp;
+
+DROP view IF EXISTS ${usageStatsDB}.b2share_result_downloads_monthly_tmp";
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/workflow.xml b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/workflow.xml
new file mode 100755
index 000000000..fa62b1d07
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usagestatsupdate/oozie_app/workflow.xml
@@ -0,0 +1,307 @@
+
+
+
+ hiveMetastoreUris
+ Hive server metastore URIs
+
+
+ hiveJdbcUrl
+ Hive server jdbc url
+
+
+ impalaJdbcUrl
+ Impala server jdbc url
+
+
+ portalMatomoID
+ Matomo ID for OpenAIRE Explore
+
+
+ usageRawDataDB
+ Raw Usage Data DB
+
+
+ usageStatsDB
+ Usage Stats DB
+
+
+ statsDB
+ Stats DB
+
+
+ usagestatsPermanentDB
+ Shadow Usage Stats DB
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+
+
+ hive.metastore.uris
+ ${hiveMetastoreUris}
+
+
+ mapreduce.job.queuename
+ ${queueName}
+
+
+ oozie.launcher.mapred.job.queue.name
+ ${oozieLauncherQueueName}
+
+
+ spark.executor.memory
+ 19166291558
+
+
+ spark.yarn.executor.memoryOverhead
+ 3225
+
+
+ spark.driver.memory
+ 11596411699
+
+
+ spark.yarn.driver.memoryOverhead
+ 1228
+
+
+
+
+
+
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ usageRawDataDB=${usageRawDataDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ statsDB=${statsDB}
+ portalMatomoID=${portalMatomoID}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ statsDB=${statsDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ statsDB=${statsDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+
+
+
+
+
+
+
+ eu.dnetlib.oa.graph.usagestatsupdate.export.ExecuteWorkflow
+ --dbHiveUrl${hiveJdbcUrl}
+ --dbImpalaUrl${impalaJdbcUrl}
+ --usageRawDataDBSchema${usageRawDataDBSchema}
+ --usageStatsDBSchema${usageStatsDBSchema}
+ --usagestatsPermanentDB${usagestatsPermanentDB}
+ --statsDBSchema${statsDBSchema}
+
+
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ usageRawDataDB=${usageRawDataDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ statsDB=${statsDB}
+ usageRawDataDB=${usageRawDataDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ statsDB=${statsDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ usageRawDataDB=${usageRawDataDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ statsDB=${statsDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ statsDB=${statsDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ statsDB=${statsDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ statsDB=${statsDB}
+ usageRawDataDB=${usageRawDataDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ statsDB=${statsDB}
+ usageRawDataDB=${usageRawDataDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ usageStatsDB=${usageStatsDB}
+ usagestatsPermanentDB=${usagestatsPermanentDB}
+
+
+
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+ invalidate_metadata.sh
+ ${usagestatsPermanentDB}
+ invalidate_metadata.sh
+
+
+
+
+
+
+