diff --git a/dhp-workflows/dhp-usage-stats-update/pom.xml b/dhp-workflows/dhp-usage-stats-update/pom.xml
new file mode 100644
index 000000000..f85872fbd
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/pom.xml
@@ -0,0 +1,32 @@
+
+
+
+ dhp-workflows
+ eu.dnetlib.dhp
+ 1.1.7-SNAPSHOT
+
+ 4.0.0
+ dhp-usage-stats-update
+
+
+ org.apache.spark
+ spark-core_2.11
+
+
+ org.apache.spark
+ spark-sql_2.11
+
+
+
+
+
+ pl.project13.maven
+ git-commit-id-plugin
+ 2.1.11
+
+ false
+
+
+
+
+
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ConnectDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ConnectDB.java
new file mode 100644
index 000000000..d4b9e6786
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ConnectDB.java
@@ -0,0 +1,66 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package eu.dnetlib.usagestats.export;
+
+/*
+ @author dpie
+ */
+
+/*
+ @author dpie
+ */
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.Properties;
+
+import org.apache.log4j.Logger;
+
+public abstract class ConnectDB {
+
+ private static Connection DB_CONNECTION;
+
+ private static String dbURL;
+ private static String dbUsername;
+ private static String dbPassword;
+ private static String defaultDBSchema;
+ private final static Logger log = Logger.getLogger(ConnectDB.class);
+
+ static void init(Properties properties) throws ClassNotFoundException {
+
+ dbURL = properties.getProperty("Stats_db_Url");
+ dbUsername = properties.getProperty("Stats_db_User");
+ dbPassword = properties.getProperty("Stats_db_Pass");
+ defaultDBSchema = properties.getProperty("Stats_db_Schema");
+
+ Class.forName(properties.getProperty("Stats_db_Driver"));
+ }
+
+ public static Connection getConnection() throws SQLException {
+ if (DB_CONNECTION != null && !DB_CONNECTION.isClosed()) {
+ return DB_CONNECTION;
+ } else {
+ DB_CONNECTION = connect();
+
+ return DB_CONNECTION;
+ }
+ }
+
+ private static Connection connect() throws SQLException {
+ Connection connection = DriverManager.getConnection(dbURL, dbUsername, dbPassword);
+ Statement stmt = connection.createStatement();
+ String sqlSetSearchPath = "SET search_path TO " + defaultDBSchema + ";";
+ stmt.executeUpdate(sqlSetSearchPath);
+ stmt.close();
+
+ log.debug("Opened database successfully");
+
+ return connection;
+ }
+
+}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ExecuteWorkflow.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ExecuteWorkflow.java
new file mode 100644
index 000000000..3e980c4bd
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ExecuteWorkflow.java
@@ -0,0 +1,43 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package eu.dnetlib.usagestats.export;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @author dpie
+ */
+public class ExecuteWorkflow {
+
+ public static void main(String args[]) throws Exception {
+
+ Properties prop = new Properties();
+ InputStream propertiesInputStream = UsageStatsExporter.class
+ .getClassLoader()
+ .getResourceAsStream("usagestats.properties");
+ prop.load(propertiesInputStream);
+
+ UsageStatsExporter usagestatsExport = new UsageStatsExporter(prop);
+ usagestatsExport.export();
+ }
+}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/IrusStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/IrusStats.java
new file mode 100644
index 000000000..8062ce428
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/IrusStats.java
@@ -0,0 +1,431 @@
+
+package eu.dnetlib.usagestats.export;
+
+/**
+ *
+ * @author dpie
+ */
+
+/**
+ * @author dpie
+ */
+import java.io.*;
+// import java.io.BufferedReader;
+// import java.io.InputStreamReader;
+import java.net.URL;
+import java.net.URLConnection;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+
+import org.apache.log4j.Logger;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+
+/**
+ * Created by dpie on 20/01/2020.
+ */
+public class IrusStats {
+
+ private String irusUKURL;
+
+// private Connection conn = null;
+// private Statement stmt = null;
+
+ private final Logger log = Logger.getLogger(this.getClass());
+
+ public IrusStats(String irusUKURL) throws Exception {
+ this.irusUKURL = irusUKURL;
+ createTables();
+ createTmpTables();
+ }
+
+ private void createTables() throws Exception {
+ try {
+
+ Statement stmt = ConnectDB.getConnection().createStatement();
+ String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
+ stmt.executeUpdate(sqlCreateTableSushiLog);
+ String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
+ + " ON INSERT TO sushilog "
+ + " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository,"
+ + "sushilog.rid, sushilog.date "
+ + "FROM sushilog "
+ + "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
+ stmt.executeUpdate(sqlcreateRuleSushiLog);
+ String createSushiIndex = "create index if not exists sushilog_duplicates on sushilog(source, repository, rid, date, metric_type);";
+ stmt.executeUpdate(createSushiIndex);
+
+ stmt.close();
+ ConnectDB.getConnection().close();
+ log.info("Sushi Tables Created");
+ } catch (Exception e) {
+ log.error("Failed to create tables: " + e);
+ throw new Exception("Failed to create tables: " + e.toString(), e);
+ }
+ }
+
+ private void createTmpTables() throws Exception {
+ try {
+
+ Statement stmt = ConnectDB.getConnection().createStatement();
+ String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilogtmp(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
+ stmt.executeUpdate(sqlCreateTableSushiLog);
+
+ // stmt.executeUpdate("CREATE TABLE IF NOT EXISTS public.sushilog AS TABLE sushilog;");
+ // String sqlCopyPublicSushiLog = "INSERT INTO sushilog SELECT * FROM public.sushilog;";
+ // stmt.executeUpdate(sqlCopyPublicSushiLog);
+ String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
+ + " ON INSERT TO sushilogtmp "
+ + " WHERE (EXISTS ( SELECT sushilogtmp.source, sushilogtmp.repository,"
+ + "sushilogtmp.rid, sushilogtmp.date "
+ + "FROM sushilogtmp "
+ + "WHERE sushilogtmp.source = new.source AND sushilogtmp.repository = new.repository AND sushilogtmp.rid = new.rid AND sushilogtmp.date = new.date AND sushilogtmp.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
+ stmt.executeUpdate(sqlcreateRuleSushiLog);
+
+ stmt.close();
+ ConnectDB.getConnection().close();
+ log.info("Sushi Tmp Tables Created");
+ } catch (Exception e) {
+ log.error("Failed to create tables: " + e);
+ throw new Exception("Failed to create tables: " + e.toString(), e);
+ }
+ }
+
+ public void irusStats() throws Exception {
+ Statement stmt = ConnectDB.getConnection().createStatement();
+ ConnectDB.getConnection().setAutoCommit(false);
+
+ // String sql = "INSERT INTO sushi_result_downloads SELECT s.source, d.id AS repository, ro.id, s.date, s.count
+ // FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND
+ // metric_type='ft_total'";
+ // String sql = "SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date)
+ // ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count INTO
+ // downloads_stats FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND
+ // s.oai=ro.orid AND metric_type='ft_total'";
+ // String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id,
+ // extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0')
+ // as date, s.count FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND
+ // s.oai=ro.orid AND metric_type='ft_total';";
+ String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilogtmp s, public.datasource_oids d, public.result_oids ro WHERE s.repository=d.orid AND s.rid=ro.orid AND metric_type='ft_total' AND s.source='IRUS-UK';";
+ stmt.executeUpdate(sql);
+
+ sql = "Insert into sushilog select * from sushilogtmp;";
+ stmt.executeUpdate(sql);
+
+ ConnectDB.getConnection().commit();
+ ConnectDB.getConnection().close();
+ }
+
+ public void processIrusRRReport() throws Exception {
+ SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
+ // String reportUrl = "https://irus.jisc.ac.uk" +
+ // "/api/sushilite/v1_7/GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate=" +
+ // simpleDateFormat.format(new Date()) +
+ // "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
+ String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate="
+ + simpleDateFormat.format(new Date())
+ + "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
+
+ log.info("Getting Irus report: " + reportUrl);
+
+ String text = getJson(reportUrl, "", "");
+
+ log.info("Report: " + text);
+
+ JSONParser parser = new JSONParser();
+ JSONObject jsonObject = (JSONObject) parser.parse(text);
+ jsonObject = (JSONObject) jsonObject.get("ReportResponse");
+ jsonObject = (JSONObject) jsonObject.get("Report");
+ jsonObject = (JSONObject) jsonObject.get("Report");
+ jsonObject = (JSONObject) jsonObject.get("Customer");
+ JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
+ int i = 0;
+ for (Object aJsonArray : jsonArray) {
+ JSONObject jsonObjectRow = (JSONObject) aJsonArray;
+ JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
+ for (Object identifier : itemIdentifier) {
+ JSONObject opendoar = (JSONObject) identifier;
+ if (opendoar.get("Type").toString().equals("OpenDOAR")) {
+ // System.out.println(i + ": " + opendoar.get("Value").toString());
+ log.info(i + ": " + opendoar.get("Value").toString());
+ i++;
+ processIrusIRReport(opendoar.get("Value").toString());
+ break;
+ }
+ }
+ // break;
+ }
+ }
+
+ private void processIrusIRReport(String opendoar) throws Exception {
+ System.out.println(opendoar);
+ ConnectDB.getConnection().setAutoCommit(false);
+
+ SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
+
+ Calendar start = Calendar.getInstance();
+ start.set(Calendar.YEAR, 2016);
+ start.set(Calendar.MONTH, Calendar.JANUARY);
+ // start.setTime(simpleDateFormat.parse("2016-01"));
+
+ Calendar end = Calendar.getInstance();
+ end.add(Calendar.DAY_OF_MONTH, -1);
+
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
+ PreparedStatement st = ConnectDB
+ .getConnection()
+ .prepareStatement("SELECT max(date) FROM sushilog WHERE repository=?;");
+ st.setString(1, "opendoar____::" + opendoar);
+ ResultSet rs_date = st.executeQuery();
+ while (rs_date.next()) {
+ if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null")
+ && !rs_date.getString(1).equals("")) {
+ start.setTime(sdf.parse(rs_date.getString(1)));
+ }
+ }
+ rs_date.close();
+ PreparedStatement preparedStatement = ConnectDB
+ .getConnection()
+ .prepareStatement(
+ "INSERT INTO sushilogtmp (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
+ int batch_size = 0;
+
+ while (start.before(end)) {
+ // log.info("date: " + simpleDateFormat.format(start.getTime()));
+ String reportUrl = this.irusUKURL + "GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate="
+ + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime())
+ + "&RepositoryIdentifier=opendoar%3A" + opendoar
+ + "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
+ start.add(Calendar.MONTH, 1);
+
+ String text = getJson(reportUrl, "", "");
+ if (text == null) {
+ continue;
+ }
+
+ JSONParser parser = new JSONParser();
+ JSONObject jsonObject = (JSONObject) parser.parse(text);
+ jsonObject = (JSONObject) jsonObject.get("ReportResponse");
+ jsonObject = (JSONObject) jsonObject.get("Report");
+ jsonObject = (JSONObject) jsonObject.get("Report");
+ jsonObject = (JSONObject) jsonObject.get("Customer");
+ JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
+ if (jsonArray == null) {
+ continue;
+ }
+ String oai = "";
+ for (Object aJsonArray : jsonArray) {
+ JSONObject jsonObjectRow = (JSONObject) aJsonArray;
+ JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
+ for (Object identifier : itemIdentifier) {
+ JSONObject oaiPmh = (JSONObject) identifier;
+ if (oaiPmh.get("Type").toString().equals("OAI")) {
+ oai = oaiPmh.get("Value").toString();
+ // System.out.println("OAI: " + oai);
+ break;
+ }
+ }
+
+ JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
+ String period;
+ String type;
+ String count;
+ for (Object perf : itemPerformance) {
+ JSONObject performance = (JSONObject) perf;
+ JSONObject periodObj = (JSONObject) performance.get("Period");
+ period = periodObj.get("Begin").toString();
+ JSONObject instanceObj = (JSONObject) performance.get("Instance");
+ type = instanceObj.get("MetricType").toString();
+ count = instanceObj.get("Count").toString();
+ // System.out.println(oai + " : " + period + " : " + count);
+
+ preparedStatement.setString(1, "IRUS-UK");
+ preparedStatement.setString(2, "opendoar____::" + opendoar);
+ preparedStatement.setString(3, oai);
+ preparedStatement.setString(4, period);
+ preparedStatement.setString(5, type);
+ preparedStatement.setInt(6, Integer.parseInt(count));
+ preparedStatement.addBatch();
+ batch_size++;
+ if (batch_size == 10000) {
+ preparedStatement.executeBatch();
+ ConnectDB.getConnection().commit();
+ batch_size = 0;
+ }
+ }
+ // break;
+ }
+ // break;
+ }
+
+ preparedStatement.executeBatch();
+ ConnectDB.getConnection().commit();
+ ConnectDB.getConnection().close();
+ }
+
+ public void processIrusIRReport(String opendoar, String startDate) throws Exception {
+ ConnectDB.getConnection().setAutoCommit(false);
+
+ SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
+
+ Calendar start = Calendar.getInstance();
+ start.set(Calendar.YEAR, 2016);
+ start.set(Calendar.MONTH, Calendar.JANUARY);
+ // start.setTime(simpleDateFormat.parse("2016-01"));
+
+ Calendar end = Calendar.getInstance();
+ end.add(Calendar.DAY_OF_MONTH, -1);
+
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
+ start.setTime(sdf.parse(startDate));
+
+ String createTablesQuery = "-- Table: shadow.sushilog" + opendoar + "\n"
+ + "\n"
+ + "-- DROP TABLE shadow.sushilog" + opendoar + ";\n"
+ + "\n"
+ + "CREATE TABLE shadow.sushilog" + opendoar + "\n"
+ + "(\n"
+ + " source text COLLATE pg_catalog.\"default\" NOT NULL,\n"
+ + " repository text COLLATE pg_catalog.\"default\" NOT NULL,\n"
+ + " rid text COLLATE pg_catalog.\"default\" NOT NULL,\n"
+ + " date text COLLATE pg_catalog.\"default\" NOT NULL,\n"
+ + " metric_type text COLLATE pg_catalog.\"default\" NOT NULL,\n"
+ + " count integer,\n"
+ + " CONSTRAINT sushilog" + opendoar + "_pkey PRIMARY KEY (source, repository, rid, date, metric_type)\n"
+ + " USING INDEX TABLESPACE index_storage\n"
+ + ")\n"
+ + "\n"
+ + "TABLESPACE pg_default;\n"
+ + "\n"
+ + "ALTER TABLE shadow.sushilog" + opendoar + "\n"
+ + " OWNER to sqoop;\n"
+ + "\n"
+ + "-- Rule: ignore_duplicate_inserts ON shadow.sushilog" + opendoar + "\n"
+ + "\n"
+ + "-- DROP Rule ignore_duplicate_inserts ON shadow.sushilog" + opendoar + ";\n"
+ + "\n"
+ + "CREATE OR REPLACE RULE ignore_duplicate_inserts AS\n"
+ + " ON INSERT TO shadow.sushilog" + opendoar + "\n"
+ + " WHERE (EXISTS ( SELECT sushilog" + opendoar + ".source,\n"
+ + " sushilog" + opendoar + ".repository,\n"
+ + " sushilog" + opendoar + ".rid,\n"
+ + " sushilog" + opendoar + ".date\n"
+ + " FROM sushilog" + opendoar + "\n"
+ + " WHERE sushilog" + opendoar + ".source = new.source AND sushilog" + opendoar
+ + ".repository = new.repository AND sushilog" + opendoar + ".rid = new.rid AND sushilog" + opendoar
+ + ".date = new.date AND sushilog" + opendoar + ".metric_type = new.metric_type))\n"
+ + " DO INSTEAD\n"
+ + "NOTHING;";
+
+ Statement stCreateTables = ConnectDB.getConnection().createStatement();
+ stCreateTables.execute(createTablesQuery);
+ ConnectDB.getConnection().commit();
+
+ PreparedStatement preparedStatement = ConnectDB
+ .getConnection()
+ .prepareStatement(
+ "INSERT INTO sushilog" + opendoar
+ + " (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
+ int batch_size = 0;
+
+ while (start.before(end)) {
+ // log.info("date: " + simpleDateFormat.format(start.getTime()));
+ String reportUrl = "https://irus.jisc.ac.uk/api/sushilite/v1_7/GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate="
+ + simpleDateFormat.format(start.getTime()) + "&EndDate=2019-10-31&RepositoryIdentifier=opendoar%3A"
+ + opendoar + "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
+ start.add(Calendar.MONTH, 1);
+
+ String text = getJson(reportUrl, "", "");
+ if (text == null) {
+ continue;
+ }
+
+ JSONParser parser = new JSONParser();
+ JSONObject jsonObject = (JSONObject) parser.parse(text);
+ jsonObject = (JSONObject) jsonObject.get("ReportResponse");
+ jsonObject = (JSONObject) jsonObject.get("Report");
+ jsonObject = (JSONObject) jsonObject.get("Report");
+ jsonObject = (JSONObject) jsonObject.get("Customer");
+ JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
+ if (jsonArray == null) {
+ continue;
+ }
+ String oai = "";
+ for (Object aJsonArray : jsonArray) {
+ JSONObject jsonObjectRow = (JSONObject) aJsonArray;
+ JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
+ for (Object identifier : itemIdentifier) {
+ JSONObject oaiPmh = (JSONObject) identifier;
+ if (oaiPmh.get("Type").toString().equals("OAI")) {
+ oai = oaiPmh.get("Value").toString();
+ // System.out.println("OAI: " + oai);
+ break;
+ }
+ }
+
+ JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
+ String period;
+ String type;
+ String count;
+ for (Object perf : itemPerformance) {
+ JSONObject performance = (JSONObject) perf;
+ JSONObject periodObj = (JSONObject) performance.get("Period");
+ period = periodObj.get("Begin").toString();
+ JSONObject instanceObj = (JSONObject) performance.get("Instance");
+ type = instanceObj.get("MetricType").toString();
+ count = instanceObj.get("Count").toString();
+ // System.out.println(oai + " : " + period + " : " + count);
+
+ preparedStatement.setString(1, "IRUS-UK");
+ preparedStatement.setString(2, "opendoar____::" + opendoar);
+ preparedStatement.setString(3, oai);
+ preparedStatement.setString(4, period);
+ preparedStatement.setString(5, type);
+ preparedStatement.setInt(6, Integer.parseInt(count));
+ preparedStatement.addBatch();
+ batch_size++;
+ if (batch_size == 10000) {
+ preparedStatement.executeBatch();
+ ConnectDB.getConnection().commit();
+ batch_size = 0;
+ }
+ }
+ // break;
+ }
+ // break;
+ }
+
+ preparedStatement.executeBatch();
+ ConnectDB.getConnection().commit();
+ ConnectDB.getConnection().close();
+ }
+
+ private String getJson(String url, String username, String password) throws Exception {
+ // String cred=username+":"+password;
+ // String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
+ try {
+ URL website = new URL(url);
+ URLConnection connection = website.openConnection();
+ // connection.setRequestProperty ("Authorization", "Basic "+encoded);
+ StringBuilder response;
+ try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
+ response = new StringBuilder();
+ String inputLine;
+ while ((inputLine = in.readLine()) != null) {
+ response.append(inputLine);
+ response.append("\n");
+ }
+ }
+ return response.toString();
+ } catch (Exception e) {
+ log.error("Failed to get URL", e);
+ return null;
+ }
+ }
+}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/PiwikDownloadLogs.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/PiwikDownloadLogs.java
new file mode 100644
index 000000000..ab6645c3e
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/PiwikDownloadLogs.java
@@ -0,0 +1,132 @@
+
+package eu.dnetlib.usagestats.export;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.log4j.Logger;
+
+import java.io.*;
+import java.net.URL;
+import java.net.URLConnection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Calendar;
+
+public class PiwikDownloadLogs {
+
+ private final String piwikUrl;
+ private Date startDate;
+ private final String tokenAuth;
+
+ /*
+ * The Piwik's API method
+ */
+ private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
+ private final String format = "&format=json";
+
+ private final Logger log = Logger.getLogger(this.getClass());
+
+ public PiwikDownloadLogs(String piwikUrl, String tokenAuth) {
+ this.piwikUrl = piwikUrl;
+ this.tokenAuth = tokenAuth;
+
+ }
+
+ private String getPiwikLogUrl() {
+ return "https://" + piwikUrl + "/";
+ }
+
+ private String getJson(String url) throws Exception {
+ try {
+ URL website = new URL(url);
+ URLConnection connection = website.openConnection();
+
+ // connection.setRequestProperty ("Authorization", "Basic "+encoded);
+ StringBuilder response;
+ try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
+ response = new StringBuilder();
+ String inputLine;
+ while ((inputLine = in.readLine()) != null) {
+ response.append(inputLine);
+ response.append("\n");
+ }
+ }
+ return response.toString();
+ } catch (Exception e) {
+ log.error("Failed to get URL: " + e);
+ throw new Exception("Failed to get URL: " + e.toString(), e);
+ }
+ }
+
+ public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
+
+ Statement statement = ConnectDB.getConnection().createStatement();
+
+ ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from public.datasource where piwik_id is not null order by piwik_id;");
+ while (rs.next()) {
+ int siteId = rs.getInt(1);
+ SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
+
+ Calendar start = Calendar.getInstance();
+ start.set(Calendar.YEAR, 2016);
+ start.set(Calendar.MONTH, Calendar.MARCH);
+ //start.setTime(simpleDateFormat.parse("2016-01"));
+
+ Calendar end = Calendar.getInstance();
+ end.add(Calendar.DAY_OF_MONTH, -1);
+
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
+ PreparedStatement st = ConnectDB.DB_CONNECTION.prepareStatement("SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;");
+ st.setInt(1, siteId);
+
+ ResultSet rs_date = st.executeQuery();
+ while (rs_date.next()) {
+ if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) {
+ start.setTime(sdf.parse(rs_date.getString(1)));
+ }
+ }
+ rs_date.close();
+
+ for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
+ log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
+
+ String period = "&period=day&date=" + sdf.format(date);
+ String outFolder = "";
+ //portal siteId = 109;
+ if (siteId == Integer.parseInt(portalMatomoID)) {
+ outFolder = portalLogPath;
+ } else {
+ outFolder = repoLogsPath;
+ }
+ FileSystem fs = FileSystem.get(new Configuration());
+ FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
+
+ String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
+ String content = "";
+
+ int i = 0;
+
+ while (!content.equals("[]\n")) {
+ String apiUrl = baseApiUrl;
+
+ if (i > 0) {
+ apiUrl += "&filter_offset=" + (i * 1000);
+ }
+
+ content = getJson(apiUrl);
+
+ fin.write(content.getBytes());
+
+ i++;
+ }
+ fin.close();
+
+ }
+
+ }
+}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/PiwikStatsDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/PiwikStatsDB.java
new file mode 100644
index 000000000..e4e706745
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/PiwikStatsDB.java
@@ -0,0 +1,1022 @@
+
+package eu.dnetlib.usagestats.export;
+
+import java.io.*;
+import java.net.URLDecoder;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.sql.Timestamp;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.log4j.Logger;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+
+public class PiwikStatsDB {
+
+ private String logPath;
+ private String logRepoPath;
+ private String logPortalPath;
+
+ private Statement stmt = null;
+
+ private final Logger log = Logger.getLogger(this.getClass());
+ private String CounterRobotsURL;
+ private ArrayList robotsList;
+
+ public PiwikStatsDB(String logRepoPath, String logPortalPath) throws Exception {
+ this.logRepoPath = logRepoPath;
+ this.logPortalPath = logPortalPath;
+ this.createTables();
+ this.createTmpTables();
+ }
+
+ public void foo() {
+ Stream s = Arrays.stream(new String[] {
+ "a", "b", "c", "d"
+ });
+
+ System.out.println(s.parallel().count());
+ }
+
+ public ArrayList getRobotsList() {
+ return robotsList;
+ }
+
+ public void setRobotsList(ArrayList robotsList) {
+ this.robotsList = robotsList;
+ }
+
+ public String getCounterRobotsURL() {
+ return CounterRobotsURL;
+ }
+
+ public void setCounterRobotsURL(String CounterRobotsURL) {
+ this.CounterRobotsURL = CounterRobotsURL;
+ }
+
+ private void createTables() throws Exception {
+ try {
+ stmt = ConnectDB.getConnection().createStatement();
+ String sqlCreateTablePiwikLog = "CREATE TABLE IF NOT EXISTS piwiklog(source INTEGER, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));";
+ String sqlcreateRulePiwikLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
+ + " ON INSERT TO piwiklog "
+ + " WHERE (EXISTS ( SELECT piwiklog.source, piwiklog.id_visit,"
+ + "piwiklog.action, piwiklog.\"timestamp\", piwiklog.entity_id "
+ + "FROM piwiklog "
+ + "WHERE piwiklog.source = new.source AND piwiklog.id_visit = new.id_visit AND piwiklog.action = new.action AND piwiklog.entity_id = new.entity_id AND piwiklog.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
+ String sqlCreateRuleIndexPiwikLog = "create index if not exists piwiklog_rule on piwiklog(source, id_visit, action, entity_id, \"timestamp\");";
+ stmt.executeUpdate(sqlCreateTablePiwikLog);
+ stmt.executeUpdate(sqlcreateRulePiwikLog);
+ stmt.executeUpdate(sqlCreateRuleIndexPiwikLog);
+
+ String sqlCreateTablePortalLog = "CREATE TABLE IF NOT EXISTS process_portal_log(source INTEGER, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, timestamp));";
+ String sqlcreateRulePortalLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
+ + " ON INSERT TO process_portal_log "
+ + " WHERE (EXISTS ( SELECT process_portal_log.source, process_portal_log.id_visit,"
+ + "process_portal_log.\"timestamp\" "
+ + "FROM process_portal_log "
+ + "WHERE process_portal_log.source = new.source AND process_portal_log.id_visit = new.id_visit AND process_portal_log.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
+ String sqlCreateRuleIndexPortalLog = "create index if not exists process_portal_log_rule on process_portal_log(source, id_visit, \"timestamp\");";
+ stmt.executeUpdate(sqlCreateTablePortalLog);
+ stmt.executeUpdate(sqlcreateRulePortalLog);
+ stmt.executeUpdate(sqlCreateRuleIndexPiwikLog);
+
+ stmt.close();
+ ConnectDB.getConnection().close();
+ log.info("Usage Tables Created");
+
+ } catch (Exception e) {
+ log.error("Failed to create tables: " + e);
+ throw new Exception("Failed to create tables: " + e.toString(), e);
+ }
+ }
+
+ private void createTmpTables() throws Exception {
+ try {
+ Statement stmt = ConnectDB.getConnection().createStatement();
+ String sqlCreateTmpTablePiwikLog = "CREATE TABLE IF NOT EXISTS piwiklogtmp(source INTEGER, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, action, timestamp, entity_id));";
+ String sqlcreateTmpRulePiwikLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
+ + " ON INSERT TO piwiklogtmp "
+ + " WHERE (EXISTS ( SELECT piwiklogtmp.source, piwiklogtmp.id_visit,"
+ + "piwiklogtmp.action, piwiklogtmp.\"timestamp\", piwiklogtmp.entity_id "
+ + "FROM piwiklogtmp "
+ + "WHERE piwiklogtmp.source = new.source AND piwiklogtmp.id_visit = new.id_visit AND piwiklogtmp.action = new.action AND piwiklogtmp.entity_id = new.entity_id AND piwiklogtmp.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
+ stmt.executeUpdate(sqlCreateTmpTablePiwikLog);
+ stmt.executeUpdate(sqlcreateTmpRulePiwikLog);
+
+ // String sqlCopyPublicPiwiklog="insert into piwiklog select * from public.piwiklog;";
+ // stmt.executeUpdate(sqlCopyPublicPiwiklog);
+ String sqlCreateTmpTablePortalLog = "CREATE TABLE IF NOT EXISTS process_portal_log_tmp(source INTEGER, id_visit TEXT, country TEXT, action TEXT, url TEXT, entity_id TEXT, source_item_type TEXT, timestamp TEXT, referrer_name TEXT, agent TEXT, PRIMARY KEY(source, id_visit, timestamp));";
+ String sqlcreateTmpRulePortalLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
+ + " ON INSERT TO process_portal_log_tmp "
+ + " WHERE (EXISTS ( SELECT process_portal_log_tmp.source, process_portal_log_tmp.id_visit,"
+ + "process_portal_log_tmp.\"timestamp\" "
+ + "FROM process_portal_log_tmp "
+ + "WHERE process_portal_log_tmp.source = new.source AND process_portal_log_tmp.id_visit = new.id_visit AND process_portal_log_tmp.\"timestamp\" = new.\"timestamp\")) DO INSTEAD NOTHING;";
+ stmt.executeUpdate(sqlCreateTmpTablePortalLog);
+ stmt.executeUpdate(sqlcreateTmpRulePortalLog);
+
+ stmt.close();
+ log.info("Usage Tmp Tables Created");
+
+ } catch (Exception e) {
+ log.error("Failed to create tmptables: " + e);
+ throw new Exception("Failed to create tmp tables: " + e.toString(), e);
+ // System.exit(0);
+ }
+ }
+
+ public void processLogs() throws Exception {
+ try {
+ ReadCounterRobotsList counterRobots = new ReadCounterRobotsList(this.getCounterRobotsURL());
+ this.robotsList = counterRobots.getRobotsPatterns();
+
+ processRepositoryLog();
+ log.info("repository process done");
+ removeDoubleClicks();
+ log.info("removing double clicks done");
+ cleanOAI();
+ log.info("cleaning oai done");
+
+ viewsStats();
+ downloadsStats();
+
+ processPortalLog();
+ log.info("portal process done");
+
+ portalStats();
+ log.info("portal usagestats done");
+
+ updateProdTables();
+ log.info("updateProdTables done");
+
+ } catch (Exception e) {
+ log.error("Failed to process logs: " + e);
+ throw new Exception("Failed to process logs: " + e.toString(), e);
+ }
+ }
+
+// public void usageStats() throws Exception {
+// try {
+// viewsStats();
+// downloadsStats();
+// log.info("stat tables and views done");
+// } catch (Exception e) {
+// log.error("Failed to create usage usagestats: " + e);
+// throw new Exception("Failed to create usage usagestats: " + e.toString(), e);
+// }
+// }
+
+ public void processRepositoryLog() throws Exception {
+ Statement stmt = ConnectDB.getConnection().createStatement();
+ ConnectDB.getConnection().setAutoCommit(false);
+
+ ArrayList jsonFiles = listHdfsDir(this.logRepoPath);
+// File dir = new File(this.logRepoPath);
+// File[] jsonFiles = dir.listFiles();
+
+ PreparedStatement prepStatem = ConnectDB
+ .getConnection()
+ .prepareStatement(
+ "INSERT INTO piwiklogtmp (source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?)");
+ int batch_size = 0;
+ JSONParser parser = new JSONParser();
+ for (String jsonFile : jsonFiles) {
+ System.out.println(jsonFile);
+ JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile));
+ for (Object aJsonArray : jsonArray) {
+ JSONObject jsonObjectRow = (JSONObject) aJsonArray;
+ int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString());
+ String idVisit = jsonObjectRow.get("idVisit").toString();
+ String country = jsonObjectRow.get("country").toString();
+ String referrerName = jsonObjectRow.get("referrerName").toString();
+ String agent = jsonObjectRow.get("browser").toString();
+ boolean botFound = false;
+ Iterator it = robotsList.iterator();
+ while (it.hasNext()) {
+ // Create a Pattern object
+ Pattern r = Pattern.compile(it.next().toString());
+ // Now create matcher object.
+ Matcher m = r.matcher(agent);
+ if (m.find()) {
+ // System.out.println("Found value: " + m.group(0));
+ botFound = true;
+ break;
+ }
+ }
+ if (botFound == false) {
+ String sourceItemType = "repItem";
+
+ JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails"));
+ for (Object actionDetail : actionDetails) {
+ JSONObject actionDetailsObj = (JSONObject) actionDetail;
+
+ if (actionDetailsObj.get("customVariables") != null) {
+ SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Timestamp timestamp = new Timestamp(
+ Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000);
+ String url = actionDetailsObj.get("url").toString();
+ String oaipmh = ((JSONObject) ((JSONObject) actionDetailsObj.get("customVariables"))
+ .get("1")).get("customVariablePageValue1").toString();
+ String action = actionDetailsObj.get("type").toString();
+
+ prepStatem.setInt(1, idSite);
+ prepStatem.setString(2, idVisit);
+ prepStatem.setString(3, country);
+ prepStatem.setString(4, action);
+ prepStatem.setString(5, url);
+ prepStatem.setString(6, oaipmh);
+ prepStatem.setString(7, sourceItemType);
+ prepStatem.setString(8, simpleDateFormat.format(timestamp));
+ prepStatem.setString(9, referrerName);
+ prepStatem.setString(10, agent);
+ prepStatem.addBatch();
+ batch_size++;
+ if (batch_size == 10000) {
+ prepStatem.executeBatch();
+ ConnectDB.getConnection().commit();
+ batch_size = 0;
+ }
+ }
+ }
+ }
+ }
+ }
+ prepStatem.executeBatch();
+ ConnectDB.getConnection().commit();
+ stmt.close();
+ }
+
+ public void removeDoubleClicks() throws Exception {
+ Statement stmt = ConnectDB.getConnection().createStatement();
+ ConnectDB.getConnection().setAutoCommit(false);
+
+ // clean download double clicks
+ String sql = "DELETE FROM piwiklogtmp p WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp FROM piwiklogtmp p1, piwiklogtmp p2 WHERE p1.source!='5' AND p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='download' AND p1.timestamp!=p2.timestamp AND p1.timestamp jsonFiles = listHdfsDir(this.logPortalPath);
+// File folder = new File(this.logPortalPath);
+// File[] jsonFiles = folder.listFiles();
+
+ PreparedStatement prepStatem = ConnectDB
+ .getConnection()
+ .prepareStatement(
+ "INSERT INTO process_portal_log_tmp (source, id_visit, country, action, url, entity_id, source_item_type, timestamp, referrer_name, agent) VALUES (?,?,?,?,?,?,?,?,?,?)");
+ int batch_size = 0;
+ JSONParser parser = new JSONParser();
+ for (String jsonFile : jsonFiles) {
+ JSONArray jsonArray = (JSONArray) parser.parse(readHDFSFile(jsonFile));
+
+ for (Object aJsonArray : jsonArray) {
+ JSONObject jsonObjectRow = (JSONObject) aJsonArray;
+ int idSite = Integer.parseInt(jsonObjectRow.get("idSite").toString());
+ String idVisit = jsonObjectRow.get("idVisit").toString();
+ String country = jsonObjectRow.get("country").toString();
+ String referrerName = jsonObjectRow.get("referrerName").toString();
+ String agent = jsonObjectRow.get("browser").toString();
+ boolean botFound = false;
+ Iterator it = robotsList.iterator();
+ while (it.hasNext()) {
+ // Create a Pattern object
+ Pattern r = Pattern.compile(it.next().toString());
+ // Now create matcher object.
+ Matcher m = r.matcher(agent);
+ if (m.find()) {
+ botFound = true;
+ break;
+ }
+ }
+ if (botFound == false) {
+ JSONArray actionDetails = (JSONArray) jsonObjectRow.get(("actionDetails"));
+ for (Object actionDetail : actionDetails) {
+ JSONObject actionDetailsObj = (JSONObject) actionDetail;
+
+ SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Timestamp timestamp = new Timestamp(
+ Long.parseLong(actionDetailsObj.get("timestamp").toString()) * 1000);
+
+ String action = actionDetailsObj.get("type").toString();
+ String url = actionDetailsObj.get("url").toString();
+
+ String entityID = processPortalURL(url);
+ String sourceItemType = "";
+
+ if (entityID.indexOf("|") > 0) {
+ sourceItemType = entityID.substring(0, entityID.indexOf("|"));
+ entityID = entityID.substring(entityID.indexOf("|") + 1);
+ }
+
+ prepStatem.setInt(1, idSite);
+ prepStatem.setString(2, idVisit);
+ prepStatem.setString(3, country);
+ prepStatem.setString(4, action);
+ prepStatem.setString(5, url);
+ prepStatem.setString(6, entityID);
+ prepStatem.setString(7, sourceItemType);
+ prepStatem.setString(8, simpleDateFormat.format(timestamp));
+ prepStatem.setString(9, referrerName);
+ prepStatem.setString(10, agent);
+
+ prepStatem.addBatch();
+ batch_size++;
+ if (batch_size == 10000) {
+ prepStatem.executeBatch();
+ ConnectDB.getConnection().commit();
+ batch_size = 0;
+ }
+ }
+ }
+ }
+ }
+ prepStatem.executeBatch();
+ ConnectDB.getConnection().commit();
+
+ stmt.close();
+ ConnectDB.getConnection().close();
+ }
+
+ public void portalStats() throws SQLException {
+ Connection con = ConnectDB.getConnection();
+ Statement stmt = con.createStatement();
+ con.setAutoCommit(false);
+
+ String sql = "INSERT INTO piwiklogtmp SELECT DISTINCT source, id_visit, country, action, url, roid.orid, \'oaItem\', timestamp, referrer_name, agent FROM process_portal_log_tmp, public.result_oids roid WHERE entity_id IS NOT null AND entity_id=roid.orid AND roid.orid IS NOT null;";
+ stmt.executeUpdate(sql);
+ stmt.close();
+// con.commit();
+
+ stmt = con.createStatement();
+ sql = "INSERT INTO piwiklogtmp SELECT DISTINCT source, id_visit, country, action, url, roid.orid, \'datasource\', timestamp, referrer_name, agent FROM process_portal_log_tmp, public.datasource_oids roid WHERE entity_id IS NOT null AND entity_id=roid.orid AND roid.orid IS NOT null;";
+ stmt.executeUpdate(sql);
+ stmt.close();
+// con.commit();
+
+ stmt = con.createStatement();
+ sql = "INSERT INTO piwiklogtmp SELECT DISTINCT source, id_visit, country, action, url, roid.orid, \'organization\', timestamp, referrer_name, agent FROM process_portal_log_tmp, public.organization_oids roid WHERE entity_id IS NOT null AND entity_id=roid.orid AND roid.orid IS NOT null;";
+ stmt.executeUpdate(sql);
+ stmt.close();
+// con.commit();
+
+ stmt = con.createStatement();
+ sql = "INSERT INTO piwiklogtmp SELECT DISTINCT source, id_visit, country, action, url, roid.orid, \'project\', timestamp, referrer_name, agent FROM process_portal_log_tmp, public.project_oids roid WHERE entity_id IS NOT null AND entity_id=roid.orid AND roid.orid IS NOT null;";
+ stmt.executeUpdate(sql);
+ stmt.close();
+// con.commit();
+
+ con.close();
+ }
+
+ private void cleanOAI() throws Exception {
+ ConnectDB.getConnection().setAutoCommit(false);
+
+ stmt = ConnectDB.getConnection().createStatement();
+ String sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chlc.min-saude.pt/','oai:repositorio.chlc.min-saude.pt:') WHERE entity_id LIKE 'oai:repositorio.chlc.min-saude.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hospitaldebraga.pt/','oai:repositorio.hospitaldebraga.pt:') WHERE entity_id LIKE 'oai:repositorio.hospitaldebraga.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipl.pt/','oai:repositorio.ipl.pt:') WHERE entity_id LIKE 'oai:repositorio.ipl.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:bibliotecadigital.ipb.pt/','oai:bibliotecadigital.ipb.pt:') WHERE entity_id LIKE 'oai:bibliotecadigital.ipb.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ismai.pt/','oai:repositorio.ismai.pt:') WHERE entity_id LIKE 'oai:repositorio.ismai.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklog SET entity_id = regexp_replace(entity_id, '^oai:repositorioaberto.uab.pt/','oai:repositorioaberto.uab.pt:') WHERE entity_id LIKE 'oai:repositorioaberto.uab.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.uac.pt/','oai:repositorio.uac.pt:') WHERE entity_id LIKE 'oai:repositorio.uac.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.insa.pt/','oai:repositorio.insa.pt:') WHERE entity_id LIKE 'oai:repositorio.insa.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipcb.pt/','oai:repositorio.ipcb.pt:') WHERE entity_id LIKE 'oai:repositorio.ipcb.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ispa.pt/','oai:repositorio.ispa.pt:') WHERE entity_id LIKE 'oai:repositorio.ispa.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.chporto.pt/','oai:repositorio.chporto.pt:') WHERE entity_id LIKE 'oai:repositorio.chporto.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ucp.pt/','oai:repositorio.ucp.pt:') WHERE entity_id LIKE 'oai:repositorio.ucp.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:rihuc.huc.min-saude.pt/','oai:rihuc.huc.min-saude.pt:') WHERE entity_id LIKE 'oai:rihuc.huc.min-saude.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipv.pt/','oai:repositorio.ipv.pt:') WHERE entity_id LIKE 'oai:repositorio.ipv.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:www.repository.utl.pt/','oai:www.repository.utl.pt:') WHERE entity_id LIKE 'oai:www.repository.utl.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:run.unl.pt/','oai:run.unl.pt:') WHERE entity_id LIKE 'oai:run.unl.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:sapientia.ualg.pt/','oai:sapientia.ualg.pt:') WHERE entity_id LIKE 'oai:sapientia.ualg.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ipsantarem.pt/','oai:repositorio.ipsantarem.pt:') WHERE entity_id LIKE 'oai:repositorio.ipsantarem.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:arca.igc.gulbenkian.pt/','oai:arca.igc.gulbenkian.pt:') WHERE entity_id LIKE 'oai:arca.igc.gulbenkian.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:ubibliorum.ubi.pt/','oai:ubibliorum.ubi.pt:') WHERE entity_id LIKE 'oai:ubibliorum.ubi.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:digituma.uma.pt/','oai:digituma.uma.pt:') WHERE entity_id LIKE 'oai:digituma.uma.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.ul.pt/','oai:repositorio.ul.pt:') WHERE entity_id LIKE 'oai:repositorio.ul.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.hff.min-saude.pt/','oai:repositorio.hff.min-saude.pt:') WHERE entity_id LIKE 'oai:repositorio.hff.min-saude.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorium.sdum.uminho.pt/','oai:repositorium.sdum.uminho.pt:') WHERE entity_id LIKE 'oai:repositorium.sdum.uminho.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:recipp.ipp.pt/','oai:recipp.ipp.pt:') WHERE entity_id LIKE 'oai:recipp.ipp.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:bdigital.ufp.pt/','oai:bdigital.ufp.pt:') WHERE entity_id LIKE 'oai:bdigital.ufp.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:repositorio.lneg.pt/','oai:repositorio.lneg.pt:') WHERE entity_id LIKE 'oai:repositorio.lneg.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:iconline.ipleiria.pt/','oai:iconline.ipleiria.pt:') WHERE entity_id LIKE 'oai:iconline.ipleiria.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ stmt = ConnectDB.getConnection().createStatement();
+ sql = "UPDATE piwiklogtmp SET entity_id = regexp_replace(entity_id, '^oai:comum.rcaap.pt/','oai:comum.rcaap.pt:') WHERE entity_id LIKE 'oai:comum.rcaap.pt/%';";
+ stmt.executeUpdate(sql);
+ stmt.close();
+ ConnectDB.getConnection().commit();
+
+ ConnectDB.getConnection().close();
+ }
+
+ private String processPortalURL(String url) {
+
+ if (url.indexOf("explore.openaire.eu") > 0) {
+ try {
+ url = URLDecoder.decode(url, "UTF-8");
+ } catch (Exception e) {
+ log.info(url);
+ }
+ if (url.indexOf("datasourceId=") > 0 && url.substring(url.indexOf("datasourceId=") + 13).length() >= 46) {
+ url = "datasource|"
+ + url.substring(url.indexOf("datasourceId=") + 13, url.indexOf("datasourceId=") + 59);
+ } else if (url.indexOf("datasource=") > 0
+ && url.substring(url.indexOf("datasource=") + 11).length() >= 46) {
+ url = "datasource|" + url.substring(url.indexOf("datasource=") + 11, url.indexOf("datasource=") + 57);
+ } else if (url.indexOf("datasourceFilter=") > 0
+ && url.substring(url.indexOf("datasourceFilter=") + 17).length() >= 46) {
+ url = "datasource|"
+ + url.substring(url.indexOf("datasourceFilter=") + 17, url.indexOf("datasourceFilter=") + 63);
+ } else if (url.indexOf("articleId=") > 0 && url.substring(url.indexOf("articleId=") + 10).length() >= 46) {
+ url = "result|" + url.substring(url.indexOf("articleId=") + 10, url.indexOf("articleId=") + 56);
+ } else if (url.indexOf("datasetId=") > 0 && url.substring(url.indexOf("datasetId=") + 10).length() >= 46) {
+ url = "result|" + url.substring(url.indexOf("datasetId=") + 10, url.indexOf("datasetId=") + 56);
+ } else if (url.indexOf("projectId=") > 0 && url.substring(url.indexOf("projectId=") + 10).length() >= 46
+ && !url.contains("oai:dnet:corda")) {
+ url = "project|" + url.substring(url.indexOf("projectId=") + 10, url.indexOf("projectId=") + 56);
+ } else if (url.indexOf("organizationId=") > 0
+ && url.substring(url.indexOf("organizationId=") + 15).length() >= 46) {
+ url = "organization|"
+ + url.substring(url.indexOf("organizationId=") + 15, url.indexOf("organizationId=") + 61);
+ } else {
+ url = "";
+ }
+ } else {
+ url = "";
+ }
+
+ return url;
+ }
+
+ private void updateProdTables() throws SQLException {
+ Statement stmt = ConnectDB.getConnection().createStatement();
+ ConnectDB.getConnection().setAutoCommit(false);
+
+ String sql = "insert into piwiklog select * from piwiklogtmp;";
+ stmt.executeUpdate(sql);
+
+ sql = "insert into views_stats select * from views_stats_tmp;";
+ stmt.executeUpdate(sql);
+
+ sql = "insert into downloads_stats select * from downloads_stats_tmp;";
+ stmt.executeUpdate(sql);
+
+ sql = "insert into pageviews_stats select * from pageviews_stats_tmp;";
+ stmt.executeUpdate(sql);
+
+ sql = "DROP TABLE IF EXISTS views_stats_tmp;";
+ stmt.executeUpdate(sql);
+
+ sql = "DROP TABLE IF EXISTS downloads_stats_tmp;";
+ stmt.executeUpdate(sql);
+
+ sql = "DROP TABLE IF EXISTS pageviews_stats_tmp;";
+ stmt.executeUpdate(sql);
+
+ sql = "DROP TABLE IF EXISTS process_portal_log_tmp;";
+ stmt.executeUpdate(sql);
+
+ stmt.close();
+ ConnectDB.getConnection().commit();
+ ConnectDB.getConnection().close();
+
+ log.info("updateProdTables done");
+ }
+
+ private ArrayList listHdfsDir(String dir) throws Exception {
+
+ FileSystem hdfs = FileSystem.get(new Configuration());
+ RemoteIterator Files;
+ ArrayList fileNames = new ArrayList<>();
+
+ try {
+ Path exportPath = new Path(hdfs.getUri() + dir);
+ Files = hdfs.listFiles(exportPath, false);
+ while (Files.hasNext()) {
+ String fileName = Files.next().getPath().toString();
+ fileNames.add(fileName);
+ }
+
+ hdfs.close();
+ } catch (Exception e) {
+ log.error("HDFS file path with exported data does not exist : " + new Path(hdfs.getUri() + logPath));
+ throw new Exception("HDFS file path with exported data does not exist : " + logPath, e);
+ }
+
+ return fileNames;
+ }
+
+ private String readHDFSFile(String filename) throws Exception {
+ String result;
+ try {
+
+ FileSystem fs = FileSystem.get(new Configuration());
+ // log.info("reading file : " + filename);
+
+ BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(filename))));
+
+ StringBuilder sb = new StringBuilder();
+ String line = br.readLine();
+
+ while (line != null) {
+ if (!line.equals("[]")) {
+ sb.append(line);
+ }
+ // sb.append(line);
+ line = br.readLine();
+ }
+ result = sb.toString().replace("][{\"idSite\"", ",{\"idSite\"");
+ if (result.equals("")) {
+ result = "[]";
+ }
+
+ // fs.close();
+ } catch (Exception e) {
+ log.error(e);
+ throw new Exception(e);
+ }
+
+ return result;
+ }
+
+ private Connection getConnection() throws SQLException {
+ return ConnectDB.getConnection();
+ }
+}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ReadCounterRobotsList.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ReadCounterRobotsList.java
new file mode 100644
index 000000000..e840e6e6c
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ReadCounterRobotsList.java
@@ -0,0 +1,56 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package eu.dnetlib.usagestats.export;
+
+/**
+ *
+ * @author dpie
+ */
+
+/**
+ * @author dpie
+ */
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+
+import org.json.JSONException;
+import org.json.simple.JSONArray;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+public class ReadCounterRobotsList {
+
+ private ArrayList robotsPatterns = new ArrayList();
+ private String COUNTER_ROBOTS_URL;
+
+ public ReadCounterRobotsList(String url) throws IOException, JSONException, ParseException {
+ COUNTER_ROBOTS_URL = url;
+ robotsPatterns = readRobotsPartners(COUNTER_ROBOTS_URL);
+ }
+
+ private ArrayList readRobotsPartners(String url) throws MalformedURLException, IOException, ParseException {
+ InputStream is = new URL(url).openStream();
+ JSONParser parser = new JSONParser();
+ BufferedReader reader = new BufferedReader(new InputStreamReader(is, Charset.forName("ISO-8859-1")));
+ JSONArray jsonArray = (JSONArray) parser.parse(reader);
+ for (Object aJsonArray : jsonArray) {
+ org.json.simple.JSONObject jsonObjectRow = (org.json.simple.JSONObject) aJsonArray;
+ robotsPatterns.add(jsonObjectRow.get("pattern").toString().replace("\\", "\\\\"));
+ }
+ return robotsPatterns;
+ }
+
+ public ArrayList getRobotsPatterns() {
+ return robotsPatterns;
+ }
+}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/SarcStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/SarcStats.java
new file mode 100644
index 000000000..83fbc0205
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/SarcStats.java
@@ -0,0 +1,255 @@
+
+package eu.dnetlib.usagestats.export;
+
+import java.io.*;
+// import java.io.BufferedReader;
+// import java.io.InputStreamReader;
+import java.net.URL;
+import java.net.URLConnection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+
+import org.apache.log4j.Logger;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+
+/**
+ * Created by dpie
+ */
+public class SarcStats {
+
+ private Statement stmt = null;
+
+ private final Logger log = Logger.getLogger(this.getClass());
+
+ public SarcStats() throws Exception {
+ createTables();
+ }
+
+ private void createTables() throws Exception {
+ try {
+
+ stmt = ConnectDB.getConnection().createStatement();
+ String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
+ stmt.executeUpdate(sqlCreateTableSushiLog);
+
+ // String sqlCopyPublicSushiLog="INSERT INTO sushilog SELECT * FROM public.sushilog;";
+ // stmt.executeUpdate(sqlCopyPublicSushiLog);
+ String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
+ + " ON INSERT TO sushilog "
+ + " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository,"
+ + "sushilog.rid, sushilog.date "
+ + "FROM sushilog "
+ + "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
+ stmt.executeUpdate(sqlcreateRuleSushiLog);
+ String createSushiIndex = "create index if not exists sushilog_duplicates on sushilog(source, repository, rid, date, metric_type);";
+ stmt.executeUpdate(createSushiIndex);
+
+ stmt.close();
+ ConnectDB.getConnection().close();
+ log.info("Sushi Tables Created");
+ } catch (Exception e) {
+ log.error("Failed to create tables: " + e);
+ throw new Exception("Failed to create tables: " + e.toString(), e);
+ }
+ }
+
+ public void processSarc() throws Exception {
+ processARReport("https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", "1646-107X");
+ processARReport("https://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", "0873-819X");
+ processARReport("https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", "1646-2335");
+ processARReport("https://revistas.rcaap.pt/cct/sushiLite/v1_7/", "2182-3030");
+ processARReport("https://actapediatrica.spp.pt/sushiLite/v1_7/", "0873-9781");
+ processARReport("https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", "0873-6529");
+ processARReport("https://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", "0430-5027");
+ processARReport("https://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", "2182-8474");
+ processARReport("https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", "0871-6099");
+ processARReport("https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", "0871-9187");
+ processARReport("https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", "1646-091X");
+ processARReport("https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", "2183-5799");
+ processARReport("https://revistas.rcaap.pt/ecr/sushiLite/v1_7/", "1647-2098");
+ processARReport("https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", "0872-0754");
+ processARReport("https://revistas.rcaap.pt/cea/sushiLite/v1_7/", "1645-3794");
+ processARReport("https://revistas.rcaap.pt/proelium/sushiLite/v1_7/", "1645-8826");
+ processARReport("https://revistas.rcaap.pt/millenium/sushiLite/v1_7/", "0873-3015");
+ }
+
+ public void sarcStats() throws Exception {
+ stmt = ConnectDB.getConnection().createStatement();
+ ConnectDB.getConnection().setAutoCommit(false);
+
+ // String sql = "SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date)
+ // ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' INTO
+ // downloads_stats FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND
+ // s.rid=ro.orid AND metric_type='ft_total'";
+ String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilog s, public.datasource_oids d, public.datasource_results dr, public.result_pids ro WHERE d.orid LIKE '%' || s.repository || '%' AND dr.id=d.id AND dr.result=ro.id AND s.rid=ro.pid AND ro.type='doi' AND metric_type='ft_total' AND s.source='SARC-OJS';";
+ stmt.executeUpdate(sql);
+
+ stmt.close();
+ ConnectDB.getConnection().commit();
+ ConnectDB.getConnection().close();
+ }
+
+ public void processARReport(String url, String issn) throws Exception {
+ log.info("Processing SARC! issn: " + issn + " with url: " + url);
+ ConnectDB.getConnection().setAutoCommit(false);
+
+ SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
+
+ Calendar start = Calendar.getInstance();
+ start.set(Calendar.YEAR, 2016);
+ start.set(Calendar.MONTH, Calendar.JANUARY);
+ // start.setTime(simpleDateFormat.parse("2016-01"));
+
+ Calendar end = Calendar.getInstance();
+ end.add(Calendar.DAY_OF_MONTH, -1);
+
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
+ PreparedStatement st = ConnectDB
+ .getConnection()
+ .prepareStatement("SELECT max(date) FROM sushilog WHERE repository=?;");
+ st.setString(1, issn);
+ ResultSet rs_date = st.executeQuery();
+ while (rs_date.next()) {
+ if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null")
+ && !rs_date.getString(1).equals("")) {
+ start.setTime(sdf.parse(rs_date.getString(1)));
+ }
+ }
+ rs_date.close();
+
+ PreparedStatement preparedStatement = ConnectDB
+ .getConnection()
+ .prepareStatement(
+ "INSERT INTO sushilog (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
+ int batch_size = 0;
+
+ while (start.before(end)) {
+ // String reportUrl =
+ // "http://irus.mimas.ac.uk/api/sushilite/v1_7/GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate="
+ // + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime()) +
+ // "&RepositoryIdentifier=opendoar%3A" + opendoar +
+ // "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
+ String reportUrl = url + "GetReport/?Report=AR1&Format=json&BeginDate="
+ + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime());
+ // System.out.println(reportUrl);
+ start.add(Calendar.MONTH, 1);
+
+ String text = getJson(reportUrl);
+ if (text == null) {
+ continue;
+ }
+
+ /*
+ * PrintWriter wr = new PrintWriter(new FileWriter("logs/" + simpleDateFormat.format(start.getTime()) +
+ * ".json")); wr.print(text); wr.close();
+ */
+ JSONParser parser = new JSONParser();
+ JSONObject jsonObject = (JSONObject) parser.parse(text);
+ jsonObject = (JSONObject) jsonObject.get("sc:ReportResponse");
+ jsonObject = (JSONObject) jsonObject.get("sc:Report");
+ if (jsonObject == null) {
+ continue;
+ }
+ jsonObject = (JSONObject) jsonObject.get("c:Report");
+ jsonObject = (JSONObject) jsonObject.get("c:Customer");
+ Object obj = jsonObject.get("c:ReportItems");
+ JSONArray jsonArray = new JSONArray();
+ if (obj instanceof JSONObject) {
+ jsonArray.add(obj);
+ } else {
+ jsonArray = (JSONArray) obj;
+ // jsonArray = (JSONArray) jsonObject.get("c:ReportItems");
+ }
+ if (jsonArray == null) {
+ continue;
+ }
+
+ String rid = "";
+ for (Object aJsonArray : jsonArray) {
+ JSONObject jsonObjectRow = (JSONObject) aJsonArray;
+ JSONArray itemIdentifier = new JSONArray();
+ obj = jsonObjectRow.get("c:ItemIdentifier");
+ if (obj instanceof JSONObject) {
+ itemIdentifier.add(obj);
+ } else {
+ // JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("c:ItemIdentifier");
+ itemIdentifier = (JSONArray) obj;
+ }
+ for (Object identifier : itemIdentifier) {
+ JSONObject doi = (JSONObject) identifier;
+ if (doi.get("c:Type").toString().equals("DOI")) {
+ rid = doi.get("c:Value").toString();
+ // System.out.println("DOI: " + rid);
+ break;
+ }
+ }
+ if (rid.isEmpty()) {
+ continue;
+ }
+
+ JSONObject itemPerformance = (JSONObject) jsonObjectRow.get("c:ItemPerformance");
+ // for (Object perf : itemPerformance) {
+ JSONObject performance = (JSONObject) itemPerformance;
+ JSONObject periodObj = (JSONObject) performance.get("c:Period");
+ String period = periodObj.get("c:Begin").toString();
+ JSONObject instanceObj = (JSONObject) performance.get("c:Instance");
+ String type = instanceObj.get("c:MetricType").toString();
+ String count = instanceObj.get("c:Count").toString();
+ // System.out.println(rid + " : " + period + " : " + count);
+
+ preparedStatement.setString(1, "SARC-OJS");
+ preparedStatement.setString(2, issn);
+ // preparedStatement.setString(2, url);
+ preparedStatement.setString(3, rid);
+ preparedStatement.setString(4, period);
+ preparedStatement.setString(5, type);
+ preparedStatement.setInt(6, Integer.parseInt(count));
+ preparedStatement.addBatch();
+ batch_size++;
+ if (batch_size == 10000) {
+ preparedStatement.executeBatch();
+ ConnectDB.getConnection().commit();
+ batch_size = 0;
+ }
+ // }
+
+ // break;
+ }
+ // break;
+ }
+
+ preparedStatement.executeBatch();
+ ConnectDB.getConnection().commit();
+ ConnectDB.getConnection().close();
+ }
+
+ private String getJson(String url) {
+ // String cred=username+":"+password;
+ // String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
+ try {
+ URL website = new URL(url);
+ URLConnection connection = website.openConnection();
+ // connection.setRequestProperty ("Authorization", "Basic "+encoded);
+ StringBuilder response;
+ try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
+ response = new StringBuilder();
+ String inputLine;
+ while ((inputLine = in.readLine()) != null) {
+ response.append(inputLine);
+ response.append("\n");
+ }
+ }
+ return response.toString();
+ } catch (Exception e) {
+ log.error("Failed to get URL: " + e);
+ // System.out.println("Failed to get URL: " + e);
+ return null;
+ // throw new Exception("Failed to get URL: " + e.toString(), e);
+ }
+ }
+}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/UsageStatsExporter.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/UsageStatsExporter.java
new file mode 100644
index 000000000..436b87d87
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/UsageStatsExporter.java
@@ -0,0 +1,57 @@
+
+package eu.dnetlib.usagestats.export;
+
+import java.io.InputStream;
+import java.util.Properties;
+
+import org.apache.log4j.Logger;
+
+public class UsageStatsExporter {
+
+ private Logger log = Logger.getLogger(this.getClass());
+ private Properties properties;
+
+ public UsageStatsExporter(Properties properties) {
+ this.properties = properties;
+ }
+
+ // public void export() throws Exception {
+ public void export() throws Exception {
+
+ // read workdflow parameters
+ String matomoAuthToken = properties.getProperty("matomo_AuthToken");
+ String matomoBaseURL = properties.getProperty("matomo_BaseUrl");
+ String repoLogPath = properties.getProperty("repo_LogPath");
+ String portalLogPath = properties.getProperty("portal_LogPath");
+ String portalMatomoID = properties.getProperty("portal_MatomoID");
+ String irusUKBaseURL = properties.getProperty("IRUS_UK_BaseUrl");
+
+ // connect to DB
+ ConnectDB.init(properties);
+
+ PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
+ piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
+
+ /*
+ * Create DB tables, insert/update statistics
+ */
+ PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
+ piwikstatsdb.setCounterRobotsURL(properties.getProperty("COUNTER_robots_Url"));
+ piwikstatsdb.processLogs();
+ log.info("process logs done");
+
+ IrusStats irusstats = new IrusStats(irusUKBaseURL);
+ irusstats.processIrusRRReport();
+ irusstats.irusStats();
+ log.info("irus done");
+
+ SarcStats sarcStats = new SarcStats();
+ sarcStats.processSarc();
+ sarcStats.sarcStats();
+ log.info("sarc done");
+
+ // finalize usagestats
+ piwikstatsdb.finalizeStats();
+ log.info("finalized stats");
+ }
+}
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/index.html b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/index.html
new file mode 100644
index 000000000..9eac2342f
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/index.html
@@ -0,0 +1,43 @@
+
+
+ Revision 58415: /dnet45/modules/dnet-openaire-usage-stats-export-wf/trunk/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export
+
+
+ Revision 58415: /dnet45/modules/dnet-openaire-usage-stats-export-wf/trunk/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export
+
+
+
+ Powered by
+ Subversion
+ version 1.4.4 (r25188).
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usage-stats/oozie_app/config-default.xml b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usage-stats/oozie_app/config-default.xml
new file mode 100644
index 000000000..ba7002cff
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usage-stats/oozie_app/config-default.xml
@@ -0,0 +1,30 @@
+
+
+ jobTracker
+ ${jobTracker}
+
+
+ nameNode
+ ${nameNode}
+
+
+ oozie.use.system.libpath
+ true
+
+
+ oozie.action.sharelib.for.spark
+ spark2
+
+
+ hiveMetastoreUris
+ thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
+
+
+ hiveJdbcUrl
+ jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000
+
+
+ oozie.wf.workflow.notification.url
+ {serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status
+
+
diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usage-stats/oozie_app/workflow.xml b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usage-stats/oozie_app/workflow.xml
new file mode 100644
index 000000000..70d4dcffc
--- /dev/null
+++ b/dhp-workflows/dhp-usage-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/usage-stats/oozie_app/workflow.xml
@@ -0,0 +1,76 @@
+
+
+
+ stats_db_name
+ the target stats database name
+
+
+ openaire_db_name
+ the original graph database name
+
+
+ external_stats_db_name
+ stats_ext
+ the external stats that should be added since they are not included in the graph database
+
+
+ hiveMetastoreUris
+ hive server metastore URIs
+
+
+ hiveJdbcUrl
+ hive server jdbc url
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+
+
+ hive.metastore.uris
+ ${hiveMetastoreUris}
+
+
+
+
+
+
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+
+
+ mapred.job.queue.name
+ ${queueName}
+
+
+ eu.dnetlib.oa.graph.usage-stats.export.UsageStatsExporter
+ org.apache.oozie.test.MyTest
+ ${outputFileName}
+
+
+
+
+
+
+
+
+
+
+ ${hiveJdbcUrl}
+
+ stats_db_name=${stats_db_name}
+ openaire_db_name=${openaire_db_name}
+
+
+
+
+
+
+
diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml
index ea3433903..ca82cf1fa 100644
--- a/dhp-workflows/pom.xml
+++ b/dhp-workflows/pom.xml
@@ -26,6 +26,7 @@
dhp-dedup-scholexplorer
dhp-graph-provision-scholexplorer
dhp-stats-update
+ dhp-usage-stats-update
dhp-broker-events