forked from D-Net/dnet-hadoop
Adding the main java files, the directory structure and main workflow file
This commit is contained in:
parent
ac0da5a7ee
commit
af62b14f91
|
@ -0,0 +1,32 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<parent>
|
||||||
|
<artifactId>dhp-workflows</artifactId>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<artifactId>dhp-usage-stats-update</artifactId>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>pl.project13.maven</groupId>
|
||||||
|
<artifactId>git-commit-id-plugin</artifactId>
|
||||||
|
<version>2.1.11</version>
|
||||||
|
<configuration>
|
||||||
|
<failOnNoGitDirectory>false</failOnNoGitDirectory>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</project>
|
|
@ -0,0 +1,66 @@
|
||||||
|
/*
|
||||||
|
* To change this license header, choose License Headers in Project Properties.
|
||||||
|
* To change this template file, choose Tools | Templates
|
||||||
|
* and open the template in the editor.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eu.dnetlib.usagestats.export;
|
||||||
|
|
||||||
|
/*
|
||||||
|
@author dpie
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
@author dpie
|
||||||
|
*/
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
public abstract class ConnectDB {
|
||||||
|
|
||||||
|
private static Connection DB_CONNECTION;
|
||||||
|
|
||||||
|
private static String dbURL;
|
||||||
|
private static String dbUsername;
|
||||||
|
private static String dbPassword;
|
||||||
|
private static String defaultDBSchema;
|
||||||
|
private final static Logger log = Logger.getLogger(ConnectDB.class);
|
||||||
|
|
||||||
|
static void init(Properties properties) throws ClassNotFoundException {
|
||||||
|
|
||||||
|
dbURL = properties.getProperty("Stats_db_Url");
|
||||||
|
dbUsername = properties.getProperty("Stats_db_User");
|
||||||
|
dbPassword = properties.getProperty("Stats_db_Pass");
|
||||||
|
defaultDBSchema = properties.getProperty("Stats_db_Schema");
|
||||||
|
|
||||||
|
Class.forName(properties.getProperty("Stats_db_Driver"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Connection getConnection() throws SQLException {
|
||||||
|
if (DB_CONNECTION != null && !DB_CONNECTION.isClosed()) {
|
||||||
|
return DB_CONNECTION;
|
||||||
|
} else {
|
||||||
|
DB_CONNECTION = connect();
|
||||||
|
|
||||||
|
return DB_CONNECTION;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Connection connect() throws SQLException {
|
||||||
|
Connection connection = DriverManager.getConnection(dbURL, dbUsername, dbPassword);
|
||||||
|
Statement stmt = connection.createStatement();
|
||||||
|
String sqlSetSearchPath = "SET search_path TO " + defaultDBSchema + ";";
|
||||||
|
stmt.executeUpdate(sqlSetSearchPath);
|
||||||
|
stmt.close();
|
||||||
|
|
||||||
|
log.debug("Opened database successfully");
|
||||||
|
|
||||||
|
return connection;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
/*
|
||||||
|
* To change this license header, choose License Headers in Project Properties.
|
||||||
|
* To change this template file, choose Tools | Templates
|
||||||
|
* and open the template in the editor.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eu.dnetlib.usagestats.export;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.FileReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author dpie
|
||||||
|
*/
|
||||||
|
public class ExecuteWorkflow {
|
||||||
|
|
||||||
|
public static void main(String args[]) throws Exception {
|
||||||
|
|
||||||
|
Properties prop = new Properties();
|
||||||
|
InputStream propertiesInputStream = UsageStatsExporter.class
|
||||||
|
.getClassLoader()
|
||||||
|
.getResourceAsStream("usagestats.properties");
|
||||||
|
prop.load(propertiesInputStream);
|
||||||
|
|
||||||
|
UsageStatsExporter usagestatsExport = new UsageStatsExporter(prop);
|
||||||
|
usagestatsExport.export();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,431 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.usagestats.export;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author dpie
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author dpie
|
||||||
|
*/
|
||||||
|
import java.io.*;
|
||||||
|
// import java.io.BufferedReader;
|
||||||
|
// import java.io.InputStreamReader;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLConnection;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Calendar;
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.json.simple.JSONArray;
|
||||||
|
import org.json.simple.JSONObject;
|
||||||
|
import org.json.simple.parser.JSONParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by dpie on 20/01/2020.
|
||||||
|
*/
|
||||||
|
public class IrusStats {
|
||||||
|
|
||||||
|
private String irusUKURL;
|
||||||
|
|
||||||
|
// private Connection conn = null;
|
||||||
|
// private Statement stmt = null;
|
||||||
|
|
||||||
|
private final Logger log = Logger.getLogger(this.getClass());
|
||||||
|
|
||||||
|
public IrusStats(String irusUKURL) throws Exception {
|
||||||
|
this.irusUKURL = irusUKURL;
|
||||||
|
createTables();
|
||||||
|
createTmpTables();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createTables() throws Exception {
|
||||||
|
try {
|
||||||
|
|
||||||
|
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||||
|
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
|
||||||
|
stmt.executeUpdate(sqlCreateTableSushiLog);
|
||||||
|
String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
|
||||||
|
+ " ON INSERT TO sushilog "
|
||||||
|
+ " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository,"
|
||||||
|
+ "sushilog.rid, sushilog.date "
|
||||||
|
+ "FROM sushilog "
|
||||||
|
+ "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
|
||||||
|
stmt.executeUpdate(sqlcreateRuleSushiLog);
|
||||||
|
String createSushiIndex = "create index if not exists sushilog_duplicates on sushilog(source, repository, rid, date, metric_type);";
|
||||||
|
stmt.executeUpdate(createSushiIndex);
|
||||||
|
|
||||||
|
stmt.close();
|
||||||
|
ConnectDB.getConnection().close();
|
||||||
|
log.info("Sushi Tables Created");
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Failed to create tables: " + e);
|
||||||
|
throw new Exception("Failed to create tables: " + e.toString(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createTmpTables() throws Exception {
|
||||||
|
try {
|
||||||
|
|
||||||
|
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||||
|
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilogtmp(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
|
||||||
|
stmt.executeUpdate(sqlCreateTableSushiLog);
|
||||||
|
|
||||||
|
// stmt.executeUpdate("CREATE TABLE IF NOT EXISTS public.sushilog AS TABLE sushilog;");
|
||||||
|
// String sqlCopyPublicSushiLog = "INSERT INTO sushilog SELECT * FROM public.sushilog;";
|
||||||
|
// stmt.executeUpdate(sqlCopyPublicSushiLog);
|
||||||
|
String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
|
||||||
|
+ " ON INSERT TO sushilogtmp "
|
||||||
|
+ " WHERE (EXISTS ( SELECT sushilogtmp.source, sushilogtmp.repository,"
|
||||||
|
+ "sushilogtmp.rid, sushilogtmp.date "
|
||||||
|
+ "FROM sushilogtmp "
|
||||||
|
+ "WHERE sushilogtmp.source = new.source AND sushilogtmp.repository = new.repository AND sushilogtmp.rid = new.rid AND sushilogtmp.date = new.date AND sushilogtmp.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
|
||||||
|
stmt.executeUpdate(sqlcreateRuleSushiLog);
|
||||||
|
|
||||||
|
stmt.close();
|
||||||
|
ConnectDB.getConnection().close();
|
||||||
|
log.info("Sushi Tmp Tables Created");
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Failed to create tables: " + e);
|
||||||
|
throw new Exception("Failed to create tables: " + e.toString(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void irusStats() throws Exception {
|
||||||
|
Statement stmt = ConnectDB.getConnection().createStatement();
|
||||||
|
ConnectDB.getConnection().setAutoCommit(false);
|
||||||
|
|
||||||
|
// String sql = "INSERT INTO sushi_result_downloads SELECT s.source, d.id AS repository, ro.id, s.date, s.count
|
||||||
|
// FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND s.oai=ro.orid AND
|
||||||
|
// metric_type='ft_total'";
|
||||||
|
// String sql = "SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date)
|
||||||
|
// ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count INTO
|
||||||
|
// downloads_stats FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND
|
||||||
|
// s.oai=ro.orid AND metric_type='ft_total'";
|
||||||
|
// String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id,
|
||||||
|
// extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0')
|
||||||
|
// as date, s.count FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND
|
||||||
|
// s.oai=ro.orid AND metric_type='ft_total';";
|
||||||
|
String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilogtmp s, public.datasource_oids d, public.result_oids ro WHERE s.repository=d.orid AND s.rid=ro.orid AND metric_type='ft_total' AND s.source='IRUS-UK';";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
sql = "Insert into sushilog select * from sushilogtmp;";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
ConnectDB.getConnection().commit();
|
||||||
|
ConnectDB.getConnection().close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void processIrusRRReport() throws Exception {
|
||||||
|
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||||
|
// String reportUrl = "https://irus.jisc.ac.uk" +
|
||||||
|
// "/api/sushilite/v1_7/GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate=" +
|
||||||
|
// simpleDateFormat.format(new Date()) +
|
||||||
|
// "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
|
||||||
|
String reportUrl = irusUKURL + "GetReport/?Report=RR1&Release=4&RequestorID=OpenAIRE&BeginDate=2016-01&EndDate="
|
||||||
|
+ simpleDateFormat.format(new Date())
|
||||||
|
+ "&RepositoryIdentifier=&ItemDataType=&NewJiscBand=&Granularity=Monthly&Callback=";
|
||||||
|
|
||||||
|
log.info("Getting Irus report: " + reportUrl);
|
||||||
|
|
||||||
|
String text = getJson(reportUrl, "", "");
|
||||||
|
|
||||||
|
log.info("Report: " + text);
|
||||||
|
|
||||||
|
JSONParser parser = new JSONParser();
|
||||||
|
JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("ReportResponse");
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("Report");
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("Report");
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("Customer");
|
||||||
|
JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
|
||||||
|
int i = 0;
|
||||||
|
for (Object aJsonArray : jsonArray) {
|
||||||
|
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||||
|
JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
|
||||||
|
for (Object identifier : itemIdentifier) {
|
||||||
|
JSONObject opendoar = (JSONObject) identifier;
|
||||||
|
if (opendoar.get("Type").toString().equals("OpenDOAR")) {
|
||||||
|
// System.out.println(i + ": " + opendoar.get("Value").toString());
|
||||||
|
log.info(i + ": " + opendoar.get("Value").toString());
|
||||||
|
i++;
|
||||||
|
processIrusIRReport(opendoar.get("Value").toString());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void processIrusIRReport(String opendoar) throws Exception {
|
||||||
|
System.out.println(opendoar);
|
||||||
|
ConnectDB.getConnection().setAutoCommit(false);
|
||||||
|
|
||||||
|
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||||
|
|
||||||
|
Calendar start = Calendar.getInstance();
|
||||||
|
start.set(Calendar.YEAR, 2016);
|
||||||
|
start.set(Calendar.MONTH, Calendar.JANUARY);
|
||||||
|
// start.setTime(simpleDateFormat.parse("2016-01"));
|
||||||
|
|
||||||
|
Calendar end = Calendar.getInstance();
|
||||||
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
|
|
||||||
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||||
|
PreparedStatement st = ConnectDB
|
||||||
|
.getConnection()
|
||||||
|
.prepareStatement("SELECT max(date) FROM sushilog WHERE repository=?;");
|
||||||
|
st.setString(1, "opendoar____::" + opendoar);
|
||||||
|
ResultSet rs_date = st.executeQuery();
|
||||||
|
while (rs_date.next()) {
|
||||||
|
if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null")
|
||||||
|
&& !rs_date.getString(1).equals("")) {
|
||||||
|
start.setTime(sdf.parse(rs_date.getString(1)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rs_date.close();
|
||||||
|
PreparedStatement preparedStatement = ConnectDB
|
||||||
|
.getConnection()
|
||||||
|
.prepareStatement(
|
||||||
|
"INSERT INTO sushilogtmp (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
|
||||||
|
int batch_size = 0;
|
||||||
|
|
||||||
|
while (start.before(end)) {
|
||||||
|
// log.info("date: " + simpleDateFormat.format(start.getTime()));
|
||||||
|
String reportUrl = this.irusUKURL + "GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate="
|
||||||
|
+ simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime())
|
||||||
|
+ "&RepositoryIdentifier=opendoar%3A" + opendoar
|
||||||
|
+ "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
|
||||||
|
start.add(Calendar.MONTH, 1);
|
||||||
|
|
||||||
|
String text = getJson(reportUrl, "", "");
|
||||||
|
if (text == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONParser parser = new JSONParser();
|
||||||
|
JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("ReportResponse");
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("Report");
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("Report");
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("Customer");
|
||||||
|
JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
|
||||||
|
if (jsonArray == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
String oai = "";
|
||||||
|
for (Object aJsonArray : jsonArray) {
|
||||||
|
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||||
|
JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
|
||||||
|
for (Object identifier : itemIdentifier) {
|
||||||
|
JSONObject oaiPmh = (JSONObject) identifier;
|
||||||
|
if (oaiPmh.get("Type").toString().equals("OAI")) {
|
||||||
|
oai = oaiPmh.get("Value").toString();
|
||||||
|
// System.out.println("OAI: " + oai);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
|
||||||
|
String period;
|
||||||
|
String type;
|
||||||
|
String count;
|
||||||
|
for (Object perf : itemPerformance) {
|
||||||
|
JSONObject performance = (JSONObject) perf;
|
||||||
|
JSONObject periodObj = (JSONObject) performance.get("Period");
|
||||||
|
period = periodObj.get("Begin").toString();
|
||||||
|
JSONObject instanceObj = (JSONObject) performance.get("Instance");
|
||||||
|
type = instanceObj.get("MetricType").toString();
|
||||||
|
count = instanceObj.get("Count").toString();
|
||||||
|
// System.out.println(oai + " : " + period + " : " + count);
|
||||||
|
|
||||||
|
preparedStatement.setString(1, "IRUS-UK");
|
||||||
|
preparedStatement.setString(2, "opendoar____::" + opendoar);
|
||||||
|
preparedStatement.setString(3, oai);
|
||||||
|
preparedStatement.setString(4, period);
|
||||||
|
preparedStatement.setString(5, type);
|
||||||
|
preparedStatement.setInt(6, Integer.parseInt(count));
|
||||||
|
preparedStatement.addBatch();
|
||||||
|
batch_size++;
|
||||||
|
if (batch_size == 10000) {
|
||||||
|
preparedStatement.executeBatch();
|
||||||
|
ConnectDB.getConnection().commit();
|
||||||
|
batch_size = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
|
||||||
|
preparedStatement.executeBatch();
|
||||||
|
ConnectDB.getConnection().commit();
|
||||||
|
ConnectDB.getConnection().close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void processIrusIRReport(String opendoar, String startDate) throws Exception {
|
||||||
|
ConnectDB.getConnection().setAutoCommit(false);
|
||||||
|
|
||||||
|
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||||
|
|
||||||
|
Calendar start = Calendar.getInstance();
|
||||||
|
start.set(Calendar.YEAR, 2016);
|
||||||
|
start.set(Calendar.MONTH, Calendar.JANUARY);
|
||||||
|
// start.setTime(simpleDateFormat.parse("2016-01"));
|
||||||
|
|
||||||
|
Calendar end = Calendar.getInstance();
|
||||||
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
|
|
||||||
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||||
|
start.setTime(sdf.parse(startDate));
|
||||||
|
|
||||||
|
String createTablesQuery = "-- Table: shadow.sushilog" + opendoar + "\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "-- DROP TABLE shadow.sushilog" + opendoar + ";\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "CREATE TABLE shadow.sushilog" + opendoar + "\n"
|
||||||
|
+ "(\n"
|
||||||
|
+ " source text COLLATE pg_catalog.\"default\" NOT NULL,\n"
|
||||||
|
+ " repository text COLLATE pg_catalog.\"default\" NOT NULL,\n"
|
||||||
|
+ " rid text COLLATE pg_catalog.\"default\" NOT NULL,\n"
|
||||||
|
+ " date text COLLATE pg_catalog.\"default\" NOT NULL,\n"
|
||||||
|
+ " metric_type text COLLATE pg_catalog.\"default\" NOT NULL,\n"
|
||||||
|
+ " count integer,\n"
|
||||||
|
+ " CONSTRAINT sushilog" + opendoar + "_pkey PRIMARY KEY (source, repository, rid, date, metric_type)\n"
|
||||||
|
+ " USING INDEX TABLESPACE index_storage\n"
|
||||||
|
+ ")\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "TABLESPACE pg_default;\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "ALTER TABLE shadow.sushilog" + opendoar + "\n"
|
||||||
|
+ " OWNER to sqoop;\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "-- Rule: ignore_duplicate_inserts ON shadow.sushilog" + opendoar + "\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "-- DROP Rule ignore_duplicate_inserts ON shadow.sushilog" + opendoar + ";\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "CREATE OR REPLACE RULE ignore_duplicate_inserts AS\n"
|
||||||
|
+ " ON INSERT TO shadow.sushilog" + opendoar + "\n"
|
||||||
|
+ " WHERE (EXISTS ( SELECT sushilog" + opendoar + ".source,\n"
|
||||||
|
+ " sushilog" + opendoar + ".repository,\n"
|
||||||
|
+ " sushilog" + opendoar + ".rid,\n"
|
||||||
|
+ " sushilog" + opendoar + ".date\n"
|
||||||
|
+ " FROM sushilog" + opendoar + "\n"
|
||||||
|
+ " WHERE sushilog" + opendoar + ".source = new.source AND sushilog" + opendoar
|
||||||
|
+ ".repository = new.repository AND sushilog" + opendoar + ".rid = new.rid AND sushilog" + opendoar
|
||||||
|
+ ".date = new.date AND sushilog" + opendoar + ".metric_type = new.metric_type))\n"
|
||||||
|
+ " DO INSTEAD\n"
|
||||||
|
+ "NOTHING;";
|
||||||
|
|
||||||
|
Statement stCreateTables = ConnectDB.getConnection().createStatement();
|
||||||
|
stCreateTables.execute(createTablesQuery);
|
||||||
|
ConnectDB.getConnection().commit();
|
||||||
|
|
||||||
|
PreparedStatement preparedStatement = ConnectDB
|
||||||
|
.getConnection()
|
||||||
|
.prepareStatement(
|
||||||
|
"INSERT INTO sushilog" + opendoar
|
||||||
|
+ " (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
|
||||||
|
int batch_size = 0;
|
||||||
|
|
||||||
|
while (start.before(end)) {
|
||||||
|
// log.info("date: " + simpleDateFormat.format(start.getTime()));
|
||||||
|
String reportUrl = "https://irus.jisc.ac.uk/api/sushilite/v1_7/GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate="
|
||||||
|
+ simpleDateFormat.format(start.getTime()) + "&EndDate=2019-10-31&RepositoryIdentifier=opendoar%3A"
|
||||||
|
+ opendoar + "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
|
||||||
|
start.add(Calendar.MONTH, 1);
|
||||||
|
|
||||||
|
String text = getJson(reportUrl, "", "");
|
||||||
|
if (text == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONParser parser = new JSONParser();
|
||||||
|
JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("ReportResponse");
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("Report");
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("Report");
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("Customer");
|
||||||
|
JSONArray jsonArray = (JSONArray) jsonObject.get("ReportItems");
|
||||||
|
if (jsonArray == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
String oai = "";
|
||||||
|
for (Object aJsonArray : jsonArray) {
|
||||||
|
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||||
|
JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("ItemIdentifier");
|
||||||
|
for (Object identifier : itemIdentifier) {
|
||||||
|
JSONObject oaiPmh = (JSONObject) identifier;
|
||||||
|
if (oaiPmh.get("Type").toString().equals("OAI")) {
|
||||||
|
oai = oaiPmh.get("Value").toString();
|
||||||
|
// System.out.println("OAI: " + oai);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONArray itemPerformance = (JSONArray) jsonObjectRow.get("ItemPerformance");
|
||||||
|
String period;
|
||||||
|
String type;
|
||||||
|
String count;
|
||||||
|
for (Object perf : itemPerformance) {
|
||||||
|
JSONObject performance = (JSONObject) perf;
|
||||||
|
JSONObject periodObj = (JSONObject) performance.get("Period");
|
||||||
|
period = periodObj.get("Begin").toString();
|
||||||
|
JSONObject instanceObj = (JSONObject) performance.get("Instance");
|
||||||
|
type = instanceObj.get("MetricType").toString();
|
||||||
|
count = instanceObj.get("Count").toString();
|
||||||
|
// System.out.println(oai + " : " + period + " : " + count);
|
||||||
|
|
||||||
|
preparedStatement.setString(1, "IRUS-UK");
|
||||||
|
preparedStatement.setString(2, "opendoar____::" + opendoar);
|
||||||
|
preparedStatement.setString(3, oai);
|
||||||
|
preparedStatement.setString(4, period);
|
||||||
|
preparedStatement.setString(5, type);
|
||||||
|
preparedStatement.setInt(6, Integer.parseInt(count));
|
||||||
|
preparedStatement.addBatch();
|
||||||
|
batch_size++;
|
||||||
|
if (batch_size == 10000) {
|
||||||
|
preparedStatement.executeBatch();
|
||||||
|
ConnectDB.getConnection().commit();
|
||||||
|
batch_size = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
|
||||||
|
preparedStatement.executeBatch();
|
||||||
|
ConnectDB.getConnection().commit();
|
||||||
|
ConnectDB.getConnection().close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getJson(String url, String username, String password) throws Exception {
|
||||||
|
// String cred=username+":"+password;
|
||||||
|
// String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
|
||||||
|
try {
|
||||||
|
URL website = new URL(url);
|
||||||
|
URLConnection connection = website.openConnection();
|
||||||
|
// connection.setRequestProperty ("Authorization", "Basic "+encoded);
|
||||||
|
StringBuilder response;
|
||||||
|
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
|
||||||
|
response = new StringBuilder();
|
||||||
|
String inputLine;
|
||||||
|
while ((inputLine = in.readLine()) != null) {
|
||||||
|
response.append(inputLine);
|
||||||
|
response.append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return response.toString();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Failed to get URL", e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,132 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.usagestats.export;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLConnection;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.Calendar;
|
||||||
|
|
||||||
|
public class PiwikDownloadLogs {
|
||||||
|
|
||||||
|
private final String piwikUrl;
|
||||||
|
private Date startDate;
|
||||||
|
private final String tokenAuth;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The Piwik's API method
|
||||||
|
*/
|
||||||
|
private final String APImethod = "?module=API&method=Live.getLastVisitsDetails";
|
||||||
|
private final String format = "&format=json";
|
||||||
|
|
||||||
|
private final Logger log = Logger.getLogger(this.getClass());
|
||||||
|
|
||||||
|
public PiwikDownloadLogs(String piwikUrl, String tokenAuth) {
|
||||||
|
this.piwikUrl = piwikUrl;
|
||||||
|
this.tokenAuth = tokenAuth;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getPiwikLogUrl() {
|
||||||
|
return "https://" + piwikUrl + "/";
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getJson(String url) throws Exception {
|
||||||
|
try {
|
||||||
|
URL website = new URL(url);
|
||||||
|
URLConnection connection = website.openConnection();
|
||||||
|
|
||||||
|
// connection.setRequestProperty ("Authorization", "Basic "+encoded);
|
||||||
|
StringBuilder response;
|
||||||
|
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
|
||||||
|
response = new StringBuilder();
|
||||||
|
String inputLine;
|
||||||
|
while ((inputLine = in.readLine()) != null) {
|
||||||
|
response.append(inputLine);
|
||||||
|
response.append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return response.toString();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Failed to get URL: " + e);
|
||||||
|
throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
||||||
|
|
||||||
|
Statement statement = ConnectDB.getConnection().createStatement();
|
||||||
|
|
||||||
|
ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from public.datasource where piwik_id is not null order by piwik_id;");
|
||||||
|
while (rs.next()) {
|
||||||
|
int siteId = rs.getInt(1);
|
||||||
|
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||||
|
|
||||||
|
Calendar start = Calendar.getInstance();
|
||||||
|
start.set(Calendar.YEAR, 2016);
|
||||||
|
start.set(Calendar.MONTH, Calendar.MARCH);
|
||||||
|
//start.setTime(simpleDateFormat.parse("2016-01"));
|
||||||
|
|
||||||
|
Calendar end = Calendar.getInstance();
|
||||||
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
|
|
||||||
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||||
|
PreparedStatement st = ConnectDB.DB_CONNECTION.prepareStatement("SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;");
|
||||||
|
st.setInt(1, siteId);
|
||||||
|
|
||||||
|
ResultSet rs_date = st.executeQuery();
|
||||||
|
while (rs_date.next()) {
|
||||||
|
if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) {
|
||||||
|
start.setTime(sdf.parse(rs_date.getString(1)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rs_date.close();
|
||||||
|
|
||||||
|
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
||||||
|
log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
|
||||||
|
|
||||||
|
String period = "&period=day&date=" + sdf.format(date);
|
||||||
|
String outFolder = "";
|
||||||
|
//portal siteId = 109;
|
||||||
|
if (siteId == Integer.parseInt(portalMatomoID)) {
|
||||||
|
outFolder = portalLogPath;
|
||||||
|
} else {
|
||||||
|
outFolder = repoLogsPath;
|
||||||
|
}
|
||||||
|
FileSystem fs = FileSystem.get(new Configuration());
|
||||||
|
FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
|
||||||
|
|
||||||
|
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
|
||||||
|
String content = "";
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
while (!content.equals("[]\n")) {
|
||||||
|
String apiUrl = baseApiUrl;
|
||||||
|
|
||||||
|
if (i > 0) {
|
||||||
|
apiUrl += "&filter_offset=" + (i * 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
content = getJson(apiUrl);
|
||||||
|
|
||||||
|
fin.write(content.getBytes());
|
||||||
|
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
fin.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,56 @@
|
||||||
|
/*
|
||||||
|
* To change this license header, choose License Headers in Project Properties.
|
||||||
|
* To change this template file, choose Tools | Templates
|
||||||
|
* and open the template in the editor.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package eu.dnetlib.usagestats.export;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author dpie
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author dpie
|
||||||
|
*/
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.simple.JSONArray;
|
||||||
|
import org.json.simple.parser.JSONParser;
|
||||||
|
import org.json.simple.parser.ParseException;
|
||||||
|
|
||||||
|
public class ReadCounterRobotsList {
|
||||||
|
|
||||||
|
private ArrayList robotsPatterns = new ArrayList();
|
||||||
|
private String COUNTER_ROBOTS_URL;
|
||||||
|
|
||||||
|
public ReadCounterRobotsList(String url) throws IOException, JSONException, ParseException {
|
||||||
|
COUNTER_ROBOTS_URL = url;
|
||||||
|
robotsPatterns = readRobotsPartners(COUNTER_ROBOTS_URL);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ArrayList readRobotsPartners(String url) throws MalformedURLException, IOException, ParseException {
|
||||||
|
InputStream is = new URL(url).openStream();
|
||||||
|
JSONParser parser = new JSONParser();
|
||||||
|
BufferedReader reader = new BufferedReader(new InputStreamReader(is, Charset.forName("ISO-8859-1")));
|
||||||
|
JSONArray jsonArray = (JSONArray) parser.parse(reader);
|
||||||
|
for (Object aJsonArray : jsonArray) {
|
||||||
|
org.json.simple.JSONObject jsonObjectRow = (org.json.simple.JSONObject) aJsonArray;
|
||||||
|
robotsPatterns.add(jsonObjectRow.get("pattern").toString().replace("\\", "\\\\"));
|
||||||
|
}
|
||||||
|
return robotsPatterns;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList getRobotsPatterns() {
|
||||||
|
return robotsPatterns;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,255 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.usagestats.export;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
// import java.io.BufferedReader;
|
||||||
|
// import java.io.InputStreamReader;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLConnection;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Calendar;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.json.simple.JSONArray;
|
||||||
|
import org.json.simple.JSONObject;
|
||||||
|
import org.json.simple.parser.JSONParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by dpie
|
||||||
|
*/
|
||||||
|
public class SarcStats {
|
||||||
|
|
||||||
|
private Statement stmt = null;
|
||||||
|
|
||||||
|
private final Logger log = Logger.getLogger(this.getClass());
|
||||||
|
|
||||||
|
public SarcStats() throws Exception {
|
||||||
|
createTables();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createTables() throws Exception {
|
||||||
|
try {
|
||||||
|
|
||||||
|
stmt = ConnectDB.getConnection().createStatement();
|
||||||
|
String sqlCreateTableSushiLog = "CREATE TABLE IF NOT EXISTS sushilog(source TEXT, repository TEXT, rid TEXT, date TEXT, metric_type TEXT, count INT, PRIMARY KEY(source, repository, rid, date, metric_type));";
|
||||||
|
stmt.executeUpdate(sqlCreateTableSushiLog);
|
||||||
|
|
||||||
|
// String sqlCopyPublicSushiLog="INSERT INTO sushilog SELECT * FROM public.sushilog;";
|
||||||
|
// stmt.executeUpdate(sqlCopyPublicSushiLog);
|
||||||
|
String sqlcreateRuleSushiLog = "CREATE OR REPLACE RULE ignore_duplicate_inserts AS "
|
||||||
|
+ " ON INSERT TO sushilog "
|
||||||
|
+ " WHERE (EXISTS ( SELECT sushilog.source, sushilog.repository,"
|
||||||
|
+ "sushilog.rid, sushilog.date "
|
||||||
|
+ "FROM sushilog "
|
||||||
|
+ "WHERE sushilog.source = new.source AND sushilog.repository = new.repository AND sushilog.rid = new.rid AND sushilog.date = new.date AND sushilog.metric_type = new.metric_type)) DO INSTEAD NOTHING;";
|
||||||
|
stmt.executeUpdate(sqlcreateRuleSushiLog);
|
||||||
|
String createSushiIndex = "create index if not exists sushilog_duplicates on sushilog(source, repository, rid, date, metric_type);";
|
||||||
|
stmt.executeUpdate(createSushiIndex);
|
||||||
|
|
||||||
|
stmt.close();
|
||||||
|
ConnectDB.getConnection().close();
|
||||||
|
log.info("Sushi Tables Created");
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Failed to create tables: " + e);
|
||||||
|
throw new Exception("Failed to create tables: " + e.toString(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void processSarc() throws Exception {
|
||||||
|
processARReport("https://revistas.rcaap.pt/motricidade/sushiLite/v1_7/", "1646-107X");
|
||||||
|
processARReport("https://revistas.rcaap.pt/antropologicas/sushiLite/v1_7/", "0873-819X");
|
||||||
|
processARReport("https://revistas.rcaap.pt/interaccoes/sushiLite/v1_7/", "1646-2335");
|
||||||
|
processARReport("https://revistas.rcaap.pt/cct/sushiLite/v1_7/", "2182-3030");
|
||||||
|
processARReport("https://actapediatrica.spp.pt/sushiLite/v1_7/", "0873-9781");
|
||||||
|
processARReport("https://revistas.rcaap.pt/sociologiapp/sushiLite/v1_7/", "0873-6529");
|
||||||
|
processARReport("https://revistas.rcaap.pt/finisterra/sushiLite/v1_7/", "0430-5027");
|
||||||
|
processARReport("https://revistas.rcaap.pt/sisyphus/sushiLite/v1_7/", "2182-8474");
|
||||||
|
processARReport("https://revistas.rcaap.pt/anestesiologia/sushiLite/v1_7/", "0871-6099");
|
||||||
|
processARReport("https://revistas.rcaap.pt/rpe/sushiLite/v1_7/", "0871-9187");
|
||||||
|
processARReport("https://revistas.rcaap.pt/psilogos/sushiLite/v1_7/", "1646-091X");
|
||||||
|
processARReport("https://revistas.rcaap.pt/juridica/sushiLite/v1_7/", "2183-5799");
|
||||||
|
processARReport("https://revistas.rcaap.pt/ecr/sushiLite/v1_7/", "1647-2098");
|
||||||
|
processARReport("https://revistas.rcaap.pt/nascercrescer/sushiLite/v1_7/", "0872-0754");
|
||||||
|
processARReport("https://revistas.rcaap.pt/cea/sushiLite/v1_7/", "1645-3794");
|
||||||
|
processARReport("https://revistas.rcaap.pt/proelium/sushiLite/v1_7/", "1645-8826");
|
||||||
|
processARReport("https://revistas.rcaap.pt/millenium/sushiLite/v1_7/", "0873-3015");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void sarcStats() throws Exception {
|
||||||
|
stmt = ConnectDB.getConnection().createStatement();
|
||||||
|
ConnectDB.getConnection().setAutoCommit(false);
|
||||||
|
|
||||||
|
// String sql = "SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date)
|
||||||
|
// ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' INTO
|
||||||
|
// downloads_stats FROM sushilog s, datasource_oids d, result_oids ro WHERE s.repository=d.orid AND
|
||||||
|
// s.rid=ro.orid AND metric_type='ft_total'";
|
||||||
|
String sql = "INSERT INTO downloads_stats SELECT s.source, d.id AS repository_id, ro.id as result_id, extract('year' from s.date::date) ||'/'|| LPAD(CAST(extract('month' from s.date::date) AS VARCHAR), 2, '0') as date, s.count, '0' FROM sushilog s, public.datasource_oids d, public.datasource_results dr, public.result_pids ro WHERE d.orid LIKE '%' || s.repository || '%' AND dr.id=d.id AND dr.result=ro.id AND s.rid=ro.pid AND ro.type='doi' AND metric_type='ft_total' AND s.source='SARC-OJS';";
|
||||||
|
stmt.executeUpdate(sql);
|
||||||
|
|
||||||
|
stmt.close();
|
||||||
|
ConnectDB.getConnection().commit();
|
||||||
|
ConnectDB.getConnection().close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void processARReport(String url, String issn) throws Exception {
|
||||||
|
log.info("Processing SARC! issn: " + issn + " with url: " + url);
|
||||||
|
ConnectDB.getConnection().setAutoCommit(false);
|
||||||
|
|
||||||
|
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||||
|
|
||||||
|
Calendar start = Calendar.getInstance();
|
||||||
|
start.set(Calendar.YEAR, 2016);
|
||||||
|
start.set(Calendar.MONTH, Calendar.JANUARY);
|
||||||
|
// start.setTime(simpleDateFormat.parse("2016-01"));
|
||||||
|
|
||||||
|
Calendar end = Calendar.getInstance();
|
||||||
|
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
|
|
||||||
|
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||||
|
PreparedStatement st = ConnectDB
|
||||||
|
.getConnection()
|
||||||
|
.prepareStatement("SELECT max(date) FROM sushilog WHERE repository=?;");
|
||||||
|
st.setString(1, issn);
|
||||||
|
ResultSet rs_date = st.executeQuery();
|
||||||
|
while (rs_date.next()) {
|
||||||
|
if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null")
|
||||||
|
&& !rs_date.getString(1).equals("")) {
|
||||||
|
start.setTime(sdf.parse(rs_date.getString(1)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rs_date.close();
|
||||||
|
|
||||||
|
PreparedStatement preparedStatement = ConnectDB
|
||||||
|
.getConnection()
|
||||||
|
.prepareStatement(
|
||||||
|
"INSERT INTO sushilog (source, repository, rid, date, metric_type, count) VALUES (?,?,?,?,?,?)");
|
||||||
|
int batch_size = 0;
|
||||||
|
|
||||||
|
while (start.before(end)) {
|
||||||
|
// String reportUrl =
|
||||||
|
// "http://irus.mimas.ac.uk/api/sushilite/v1_7/GetReport/?Report=IR1&Release=4&RequestorID=OpenAIRE&BeginDate="
|
||||||
|
// + simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime()) +
|
||||||
|
// "&RepositoryIdentifier=opendoar%3A" + opendoar +
|
||||||
|
// "&ItemIdentifier=&ItemDataType=&hasDOI=&Granularity=Monthly&Callback=";
|
||||||
|
String reportUrl = url + "GetReport/?Report=AR1&Format=json&BeginDate="
|
||||||
|
+ simpleDateFormat.format(start.getTime()) + "&EndDate=" + simpleDateFormat.format(start.getTime());
|
||||||
|
// System.out.println(reportUrl);
|
||||||
|
start.add(Calendar.MONTH, 1);
|
||||||
|
|
||||||
|
String text = getJson(reportUrl);
|
||||||
|
if (text == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PrintWriter wr = new PrintWriter(new FileWriter("logs/" + simpleDateFormat.format(start.getTime()) +
|
||||||
|
* ".json")); wr.print(text); wr.close();
|
||||||
|
*/
|
||||||
|
JSONParser parser = new JSONParser();
|
||||||
|
JSONObject jsonObject = (JSONObject) parser.parse(text);
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("sc:ReportResponse");
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("sc:Report");
|
||||||
|
if (jsonObject == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("c:Report");
|
||||||
|
jsonObject = (JSONObject) jsonObject.get("c:Customer");
|
||||||
|
Object obj = jsonObject.get("c:ReportItems");
|
||||||
|
JSONArray jsonArray = new JSONArray();
|
||||||
|
if (obj instanceof JSONObject) {
|
||||||
|
jsonArray.add(obj);
|
||||||
|
} else {
|
||||||
|
jsonArray = (JSONArray) obj;
|
||||||
|
// jsonArray = (JSONArray) jsonObject.get("c:ReportItems");
|
||||||
|
}
|
||||||
|
if (jsonArray == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String rid = "";
|
||||||
|
for (Object aJsonArray : jsonArray) {
|
||||||
|
JSONObject jsonObjectRow = (JSONObject) aJsonArray;
|
||||||
|
JSONArray itemIdentifier = new JSONArray();
|
||||||
|
obj = jsonObjectRow.get("c:ItemIdentifier");
|
||||||
|
if (obj instanceof JSONObject) {
|
||||||
|
itemIdentifier.add(obj);
|
||||||
|
} else {
|
||||||
|
// JSONArray itemIdentifier = (JSONArray) jsonObjectRow.get("c:ItemIdentifier");
|
||||||
|
itemIdentifier = (JSONArray) obj;
|
||||||
|
}
|
||||||
|
for (Object identifier : itemIdentifier) {
|
||||||
|
JSONObject doi = (JSONObject) identifier;
|
||||||
|
if (doi.get("c:Type").toString().equals("DOI")) {
|
||||||
|
rid = doi.get("c:Value").toString();
|
||||||
|
// System.out.println("DOI: " + rid);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rid.isEmpty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONObject itemPerformance = (JSONObject) jsonObjectRow.get("c:ItemPerformance");
|
||||||
|
// for (Object perf : itemPerformance) {
|
||||||
|
JSONObject performance = (JSONObject) itemPerformance;
|
||||||
|
JSONObject periodObj = (JSONObject) performance.get("c:Period");
|
||||||
|
String period = periodObj.get("c:Begin").toString();
|
||||||
|
JSONObject instanceObj = (JSONObject) performance.get("c:Instance");
|
||||||
|
String type = instanceObj.get("c:MetricType").toString();
|
||||||
|
String count = instanceObj.get("c:Count").toString();
|
||||||
|
// System.out.println(rid + " : " + period + " : " + count);
|
||||||
|
|
||||||
|
preparedStatement.setString(1, "SARC-OJS");
|
||||||
|
preparedStatement.setString(2, issn);
|
||||||
|
// preparedStatement.setString(2, url);
|
||||||
|
preparedStatement.setString(3, rid);
|
||||||
|
preparedStatement.setString(4, period);
|
||||||
|
preparedStatement.setString(5, type);
|
||||||
|
preparedStatement.setInt(6, Integer.parseInt(count));
|
||||||
|
preparedStatement.addBatch();
|
||||||
|
batch_size++;
|
||||||
|
if (batch_size == 10000) {
|
||||||
|
preparedStatement.executeBatch();
|
||||||
|
ConnectDB.getConnection().commit();
|
||||||
|
batch_size = 0;
|
||||||
|
}
|
||||||
|
// }
|
||||||
|
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
|
||||||
|
preparedStatement.executeBatch();
|
||||||
|
ConnectDB.getConnection().commit();
|
||||||
|
ConnectDB.getConnection().close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getJson(String url) {
|
||||||
|
// String cred=username+":"+password;
|
||||||
|
// String encoded = new sun.misc.BASE64Encoder().encode (cred.getBytes());
|
||||||
|
try {
|
||||||
|
URL website = new URL(url);
|
||||||
|
URLConnection connection = website.openConnection();
|
||||||
|
// connection.setRequestProperty ("Authorization", "Basic "+encoded);
|
||||||
|
StringBuilder response;
|
||||||
|
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
|
||||||
|
response = new StringBuilder();
|
||||||
|
String inputLine;
|
||||||
|
while ((inputLine = in.readLine()) != null) {
|
||||||
|
response.append(inputLine);
|
||||||
|
response.append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return response.toString();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Failed to get URL: " + e);
|
||||||
|
// System.out.println("Failed to get URL: " + e);
|
||||||
|
return null;
|
||||||
|
// throw new Exception("Failed to get URL: " + e.toString(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.usagestats.export;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
public class UsageStatsExporter {
|
||||||
|
|
||||||
|
private Logger log = Logger.getLogger(this.getClass());
|
||||||
|
private Properties properties;
|
||||||
|
|
||||||
|
public UsageStatsExporter(Properties properties) {
|
||||||
|
this.properties = properties;
|
||||||
|
}
|
||||||
|
|
||||||
|
// public void export() throws Exception {
|
||||||
|
public void export() throws Exception {
|
||||||
|
|
||||||
|
// read workdflow parameters
|
||||||
|
String matomoAuthToken = properties.getProperty("matomo_AuthToken");
|
||||||
|
String matomoBaseURL = properties.getProperty("matomo_BaseUrl");
|
||||||
|
String repoLogPath = properties.getProperty("repo_LogPath");
|
||||||
|
String portalLogPath = properties.getProperty("portal_LogPath");
|
||||||
|
String portalMatomoID = properties.getProperty("portal_MatomoID");
|
||||||
|
String irusUKBaseURL = properties.getProperty("IRUS_UK_BaseUrl");
|
||||||
|
|
||||||
|
// connect to DB
|
||||||
|
ConnectDB.init(properties);
|
||||||
|
|
||||||
|
PiwikDownloadLogs piwd = new PiwikDownloadLogs(matomoBaseURL, matomoAuthToken);
|
||||||
|
piwd.GetOpenAIRELogs(repoLogPath, portalLogPath, portalMatomoID);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create DB tables, insert/update statistics
|
||||||
|
*/
|
||||||
|
PiwikStatsDB piwikstatsdb = new PiwikStatsDB(repoLogPath, portalLogPath);
|
||||||
|
piwikstatsdb.setCounterRobotsURL(properties.getProperty("COUNTER_robots_Url"));
|
||||||
|
piwikstatsdb.processLogs();
|
||||||
|
log.info("process logs done");
|
||||||
|
|
||||||
|
IrusStats irusstats = new IrusStats(irusUKBaseURL);
|
||||||
|
irusstats.processIrusRRReport();
|
||||||
|
irusstats.irusStats();
|
||||||
|
log.info("irus done");
|
||||||
|
|
||||||
|
SarcStats sarcStats = new SarcStats();
|
||||||
|
sarcStats.processSarc();
|
||||||
|
sarcStats.sarcStats();
|
||||||
|
log.info("sarc done");
|
||||||
|
|
||||||
|
// finalize usagestats
|
||||||
|
piwikstatsdb.finalizeStats();
|
||||||
|
log.info("finalized stats");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Revision 58415: /dnet45/modules/dnet-openaire-usage-stats-export-wf/trunk/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h2>Revision 58415: /dnet45/modules/dnet-openaire-usage-stats-export-wf/trunk/dnet-openaire-usage-stats-export/src/main/java/eu/dnetlib/usagestats/export</h2>
|
||||||
|
<ul>
|
||||||
|
<li>
|
||||||
|
<a href="../">..</a>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<a href="ConnectDB.java">ConnectDB.java</a>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<a href="ExecuteWorkflow.java">ExecuteWorkflow.java</a>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<a href="IrusStats.java">IrusStats.java</a>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<a href="PiwikDownloadLogs.java">PiwikDownloadLogs.java</a>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<a href="PiwikStatsDB.java">PiwikStatsDB.java</a>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<a href="ReadCounterRobotsList.java">ReadCounterRobotsList.java</a>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<a href="SarcStats.java">SarcStats.java</a>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<a href="UsageStatsExporter.java">UsageStatsExporter.java</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
<hr noshade>
|
||||||
|
<em>
|
||||||
|
Powered by
|
||||||
|
<a href="http://subversion.tigris.org/">Subversion</a>
|
||||||
|
version 1.4.4 (r25188).
|
||||||
|
</em>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,30 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>${jobTracker}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>${nameNode}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>spark2</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.wf.workflow.notification.url</name>
|
||||||
|
<value>{serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,76 @@
|
||||||
|
<workflow-app name="graph_stats" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>stats_db_name</name>
|
||||||
|
<description>the target stats database name</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>openaire_db_name</name>
|
||||||
|
<description>the original graph database name</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>external_stats_db_name</name>
|
||||||
|
<value>stats_ext</value>
|
||||||
|
<description>the external stats that should be added since they are not included in the graph database</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<description>hive server metastore URIs</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<description>hive server jdbc url</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>hive.metastore.uris</name>
|
||||||
|
<value>${hiveMetastoreUris}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
|
<start to="Step1"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<action name='Step1'>
|
||||||
|
<java>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapred.job.queue.name</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
<main-class>eu.dnetlib.oa.graph.usage-stats.export.UsageStatsExporter</main-class>
|
||||||
|
<main-class>org.apache.oozie.test.MyTest</main-class>
|
||||||
|
<arg>${outputFileName}</arg>
|
||||||
|
<capture-output/>
|
||||||
|
</java>
|
||||||
|
<ok to="End" />
|
||||||
|
<error to="fail" />
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<action name="StepX">
|
||||||
|
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
||||||
|
<jdbc-url>${hiveJdbcUrl}</jdbc-url>
|
||||||
|
<script>scripts/step1.sql</script>
|
||||||
|
<param>stats_db_name=${stats_db_name}</param>
|
||||||
|
<param>openaire_db_name=${openaire_db_name}</param>
|
||||||
|
</hive2>
|
||||||
|
<ok to="Step2_1"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
</workflow-app>
|
|
@ -26,6 +26,7 @@
|
||||||
<module>dhp-dedup-scholexplorer</module>
|
<module>dhp-dedup-scholexplorer</module>
|
||||||
<module>dhp-graph-provision-scholexplorer</module>
|
<module>dhp-graph-provision-scholexplorer</module>
|
||||||
<module>dhp-stats-update</module>
|
<module>dhp-stats-update</module>
|
||||||
|
<module>dhp-usage-stats-update</module>
|
||||||
<module>dhp-broker-events</module>
|
<module>dhp-broker-events</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue