The workflow starts

This commit is contained in:
Spyros Zoupanos 2020-05-12 20:38:31 +03:00
parent 0b6f302652
commit 98ba2d0282
12 changed files with 59 additions and 183 deletions

View File

@ -10,6 +10,15 @@
<groupId>eu.dnetlib</groupId>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-usage-stats-update</artifactId>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<cdh.hive.version>0.13.1-cdh5.2.1</cdh.hive.version>
<cdh.hadoop.version>2.5.0-cdh5.2.1</cdh.hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
@ -32,26 +41,17 @@
<version>20180130</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>[9.1-901.jdbc4,9.5)</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<version>2.1.11</version>
<configuration>
<failOnNoGitDirectory>false</failOnNoGitDirectory>
</configuration>
</plugin>
</plugins>
</build>
<!-- <properties>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties> -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${cdh.hive.version}</version>
<!-- <version>3.1.2</version> -->
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${cdh.hadoop.version}</version>
</dependency>
</dependencies>
</project>

View File

@ -1,60 +0,0 @@
package eu.dnetlib.oa.graph.usagestats.export;
public class ClouderaImpalaJdbcExample {
public static void main(String[] args) {
System.out.println("\n=============================================");
//// loadConfiguration();
//
//// String sqlStatement = args[0];
// String sqlStatement = "select count(*) from fundref;";
// connectionUrl = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/stats_wf_db_galexiou_oozie_beta;UID=spyros;PWD=RU78N9sqQndnH3SQ;UseNativeQuery=1";
//// connectionUrl = "jdbc:hive2://172.16.10.74:21050/emay_data;auth=noSasl";
//// "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000"
//// url=jdbc:impala://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/stats_wf_db_galexiou_oozie_beta;UID=antonis.lempesis;PWD=XXXXX;UseNativeQuery=1
// jdbcDriverName = "org.apache.hive.jdbc.HiveDriver";
//
// System.out.println("\n=============================================");
// System.out.println("Cloudera Impala JDBC Example");
// System.out.println("Using Connection URL: " + connectionUrl);
// System.out.println("USing JDBC Driver " + jdbcDriverName);
// System.out.println("Running Query: " + sqlStatement);
//
// Connection con = null;
//
// try {
//
// Class.forName(jdbcDriverName);
//
// con = DriverManager.getConnection(connectionUrl);
//
// Statement stmt = con.createStatement();
//
// ResultSet rs = stmt.executeQuery(sqlStatement);
//
// System.out.println("\n== Begin Query Results ======================");
//
// // print the results to the console
// while (rs.next()) {
// // the example query returns one String column
// System.out.println(rs.getString(1));
// }
//
// System.out.println("== End Query Results =======================\n\n");
//
// } catch (SQLException e) {
// e.printStackTrace();
// } catch (Exception e) {
// e.printStackTrace();
// } finally {
// try {
// con.close();
// } catch (Exception e) {
// // swallow
// }
// }
}
}

View File

@ -4,7 +4,7 @@
* and open the template in the editor.
*/
package eu.dnetlib.usagestats.export;
package eu.dnetlib.oa.graph.usagestats.export;
/*
@author dpie

View File

@ -4,25 +4,10 @@
* and open the template in the editor.
*/
package eu.dnetlib.usagestats.export;
package eu.dnetlib.oa.graph.usagestats.export;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author dpie

View File

@ -1,5 +1,5 @@
package eu.dnetlib.usagestats.export;
package eu.dnetlib.oa.graph.usagestats.export;
/**
* @author dpie

View File

@ -1,5 +1,5 @@
package eu.dnetlib.usagestats.export;
package eu.dnetlib.oa.graph.usagestats.export;
import java.io.*;
import java.net.URL;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.usagestats.export;
package eu.dnetlib.oa.graph.usagestats.export;
import java.io.*;
import java.net.URLDecoder;

View File

@ -4,7 +4,7 @@
* and open the template in the editor.
*/
package eu.dnetlib.usagestats.export;
package eu.dnetlib.oa.graph.usagestats.export;
/**
* @author dpie

View File

@ -1,5 +1,5 @@
package eu.dnetlib.usagestats.export;
package eu.dnetlib.oa.graph.usagestats.export;
import java.io.*;
// import java.io.BufferedReader;

View File

@ -1,7 +1,6 @@
package eu.dnetlib.usagestats.export;
package eu.dnetlib.oa.graph.usagestats.export;
import java.io.InputStream;
import java.util.Properties;
import org.apache.log4j.Logger;

View File

@ -1,18 +1,5 @@
<workflow-app name="graph_stats" xmlns="uri:oozie:workflow:0.5">
<workflow-app name="Usage Graph Stats" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>stats_db_name</name>
<description>the target stats database name</description>
</property>
<property>
<name>openaire_db_name</name>
<description>the original graph database name</description>
</property>
<property>
<name>external_stats_db_name</name>
<value>stats_ext</value>
<description>the external stats that should be added since they are not included in the graph database</description>
</property>
<property>
<name>hiveMetastoreUris</name>
<description>hive server metastore URIs</description>
@ -42,78 +29,19 @@
</configuration>
</global>
<start to="Step11"/>
<start to="Step1"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name='Step11'>
<java>
<prepare>
<delete path="${jobOutput}"/>
</prepare>
<configuration>
<property>
<name>mapred.queue.name</name>
<value>default</value>
</property>
</configuration>
<main-class>eu.dnetlib.oa.graph.usagestats.export.ClouderaImpalaJdbcExample</main-class>
<capture-output/>
</java>
<ok to="End" />
<error to="fail" />
</action>
<action name="ImportDB_claims">
<java>
<prepare>
<delete path="${contentPath}/db_claims"/>
</prepare>
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication</main-class>
<arg>--hdfsPath</arg><arg>${contentPath}/db_claims</arg>
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
<arg>--action</arg><arg>claims</arg>
</java>
<ok to="ImportODF_claims"/>
<error to="Kill"/>
</action>
<action name='Step1'>
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<main-class>eu.dnetlib.oa.graph.usagestats.export.UsageStatsExporter</main-class>
<main-class>org.apache.oozie.test.MyTest</main-class>
<arg>${outputFileName}</arg>
<capture-output/>
<main-class>eu.dnetlib.oa.graph.usagestats.export.ExecuteWorkflow</main-class>
</java>
<ok to="End" />
<error to="fail" />
<error to="Kill" />
</action>
<action name="StepX">
<hive2 xmlns="uri:oozie:hive2-action:0.1">
<jdbc-url>${hiveJdbcUrl}</jdbc-url>
<script>scripts/step1.sql</script>
<param>stats_db_name=${stats_db_name}</param>
<param>openaire_db_name=${openaire_db_name}</param>
</hive2>
<ok to="Step2_1"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,24 @@
#logging.config=log4j.properties
db.driverClassName=org.postgresql.Driver
#spring.datasource.url=jdbc:postgresql://88.197.53.70:5432/stats
#spring.datasource.url=jdbc:postgresql://statsdb-beta.openaire.eu:5432/stats
db.Url=jdbc:postgresql://88.197.53.70:5432/stats
#db.Url=jdbc:postgresql://statsdb-prod.openaire.eu/stats
db.username=sqoop
db.password=sqoop
db.defaultschema=usagestats
matomo.AuthToken=703bd17d845acdaf795e01bb1e0895b9
matomo.BaseUrl=analytics.openaire.eu
#repo.LogPath=/Users/dpie/Desktop/Repologs/
repo.LogPath=/user/spyros/logs/usage_stats_logs/Repologs
portal.LogPath=/user/spyros/logs/usage_stats_logs/Portallogs/
portal.MatomoID=109
COUNTER.robots.Url=https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json
IRUS_UK.BaseUrl=https://irus.jisc.ac.uk/api/sushilite/v1_7/
#server.compression.enabled=true
#compression.max_number_of_records=1000
#usagestats.redis.hostname=localhost
#usagestats.redis.port=6379
#spring.jackson.serialization.INDENT_OUTPUT=true
#download.folder=/Users/dpie/DownloadSushiLite
#sushi-lite.server=http://localhost:8080