forked from D-Net/dnet-hadoop
The workflow starts
This commit is contained in:
parent
0b6f302652
commit
98ba2d0282
|
@ -10,6 +10,15 @@
|
|||
<groupId>eu.dnetlib</groupId>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>dhp-usage-stats-update</artifactId>
|
||||
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
||||
<cdh.hive.version>0.13.1-cdh5.2.1</cdh.hive.version>
|
||||
<cdh.hadoop.version>2.5.0-cdh5.2.1</cdh.hadoop.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
@ -32,26 +41,17 @@
|
|||
<version>20180130</version>
|
||||
<type>jar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>postgresql</groupId>
|
||||
<artifactId>postgresql</artifactId>
|
||||
<version>[9.1-901.jdbc4,9.5)</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>pl.project13.maven</groupId>
|
||||
<artifactId>git-commit-id-plugin</artifactId>
|
||||
<version>2.1.11</version>
|
||||
<configuration>
|
||||
<failOnNoGitDirectory>false</failOnNoGitDirectory>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<!-- <properties>
|
||||
<maven.compiler.source>1.7</maven.compiler.source>
|
||||
<maven.compiler.target>1.7</maven.compiler.target>
|
||||
</properties> -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${cdh.hive.version}</version>
|
||||
<!-- <version>3.1.2</version> -->
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<version>${cdh.hadoop.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
|
|
@ -1,60 +0,0 @@
|
|||
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
public class ClouderaImpalaJdbcExample {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
System.out.println("\n=============================================");
|
||||
|
||||
//// loadConfiguration();
|
||||
//
|
||||
//// String sqlStatement = args[0];
|
||||
// String sqlStatement = "select count(*) from fundref;";
|
||||
// connectionUrl = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/stats_wf_db_galexiou_oozie_beta;UID=spyros;PWD=RU78N9sqQndnH3SQ;UseNativeQuery=1";
|
||||
//// connectionUrl = "jdbc:hive2://172.16.10.74:21050/emay_data;auth=noSasl";
|
||||
//// "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000"
|
||||
//// url=jdbc:impala://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/stats_wf_db_galexiou_oozie_beta;UID=antonis.lempesis;PWD=XXXXX;UseNativeQuery=1
|
||||
// jdbcDriverName = "org.apache.hive.jdbc.HiveDriver";
|
||||
//
|
||||
// System.out.println("\n=============================================");
|
||||
// System.out.println("Cloudera Impala JDBC Example");
|
||||
// System.out.println("Using Connection URL: " + connectionUrl);
|
||||
// System.out.println("USing JDBC Driver " + jdbcDriverName);
|
||||
// System.out.println("Running Query: " + sqlStatement);
|
||||
//
|
||||
// Connection con = null;
|
||||
//
|
||||
// try {
|
||||
//
|
||||
// Class.forName(jdbcDriverName);
|
||||
//
|
||||
// con = DriverManager.getConnection(connectionUrl);
|
||||
//
|
||||
// Statement stmt = con.createStatement();
|
||||
//
|
||||
// ResultSet rs = stmt.executeQuery(sqlStatement);
|
||||
//
|
||||
// System.out.println("\n== Begin Query Results ======================");
|
||||
//
|
||||
// // print the results to the console
|
||||
// while (rs.next()) {
|
||||
// // the example query returns one String column
|
||||
// System.out.println(rs.getString(1));
|
||||
// }
|
||||
//
|
||||
// System.out.println("== End Query Results =======================\n\n");
|
||||
//
|
||||
// } catch (SQLException e) {
|
||||
// e.printStackTrace();
|
||||
// } catch (Exception e) {
|
||||
// e.printStackTrace();
|
||||
// } finally {
|
||||
// try {
|
||||
// con.close();
|
||||
// } catch (Exception e) {
|
||||
// // swallow
|
||||
// }
|
||||
// }
|
||||
}
|
||||
}
|
|
@ -4,7 +4,7 @@
|
|||
* and open the template in the editor.
|
||||
*/
|
||||
|
||||
package eu.dnetlib.usagestats.export;
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
/*
|
||||
@author dpie
|
||||
|
|
|
@ -4,25 +4,10 @@
|
|||
* and open the template in the editor.
|
||||
*/
|
||||
|
||||
package eu.dnetlib.usagestats.export;
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Properties;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* @author dpie
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.usagestats.export;
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
/**
|
||||
* @author dpie
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.usagestats.export;
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URL;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.usagestats.export;
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URLDecoder;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* and open the template in the editor.
|
||||
*/
|
||||
|
||||
package eu.dnetlib.usagestats.export;
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
/**
|
||||
* @author dpie
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.usagestats.export;
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
import java.io.*;
|
||||
// import java.io.BufferedReader;
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
|
||||
package eu.dnetlib.usagestats.export;
|
||||
package eu.dnetlib.oa.graph.usagestats.export;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
|
@ -1,18 +1,5 @@
|
|||
<workflow-app name="graph_stats" xmlns="uri:oozie:workflow:0.5">
|
||||
<workflow-app name="Usage Graph Stats" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>stats_db_name</name>
|
||||
<description>the target stats database name</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>openaire_db_name</name>
|
||||
<description>the original graph database name</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>external_stats_db_name</name>
|
||||
<value>stats_ext</value>
|
||||
<description>the external stats that should be added since they are not included in the graph database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<description>hive server metastore URIs</description>
|
||||
|
@ -42,78 +29,19 @@
|
|||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="Step11"/>
|
||||
<start to="Step1"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name='Step11'>
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${jobOutput}"/>
|
||||
</prepare>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapred.queue.name</name>
|
||||
<value>default</value>
|
||||
</property>
|
||||
</configuration>
|
||||
<main-class>eu.dnetlib.oa.graph.usagestats.export.ClouderaImpalaJdbcExample</main-class>
|
||||
<capture-output/>
|
||||
</java>
|
||||
<ok to="End" />
|
||||
<error to="fail" />
|
||||
</action>
|
||||
|
||||
|
||||
<action name="ImportDB_claims">
|
||||
<java>
|
||||
<prepare>
|
||||
<delete path="${contentPath}/db_claims"/>
|
||||
</prepare>
|
||||
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateDbEntitiesApplication</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${contentPath}/db_claims</arg>
|
||||
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
|
||||
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
||||
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
||||
<arg>--action</arg><arg>claims</arg>
|
||||
</java>
|
||||
<ok to="ImportODF_claims"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name='Step1'>
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapred.job.queue.name</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
<main-class>eu.dnetlib.oa.graph.usagestats.export.UsageStatsExporter</main-class>
|
||||
<main-class>org.apache.oozie.test.MyTest</main-class>
|
||||
<arg>${outputFileName}</arg>
|
||||
<capture-output/>
|
||||
<main-class>eu.dnetlib.oa.graph.usagestats.export.ExecuteWorkflow</main-class>
|
||||
</java>
|
||||
<ok to="End" />
|
||||
<error to="fail" />
|
||||
<error to="Kill" />
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<action name="StepX">
|
||||
<hive2 xmlns="uri:oozie:hive2-action:0.1">
|
||||
<jdbc-url>${hiveJdbcUrl}</jdbc-url>
|
||||
<script>scripts/step1.sql</script>
|
||||
<param>stats_db_name=${stats_db_name}</param>
|
||||
<param>openaire_db_name=${openaire_db_name}</param>
|
||||
</hive2>
|
||||
<ok to="Step2_1"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
#logging.config=log4j.properties
|
||||
db.driverClassName=org.postgresql.Driver
|
||||
#spring.datasource.url=jdbc:postgresql://88.197.53.70:5432/stats
|
||||
#spring.datasource.url=jdbc:postgresql://statsdb-beta.openaire.eu:5432/stats
|
||||
db.Url=jdbc:postgresql://88.197.53.70:5432/stats
|
||||
#db.Url=jdbc:postgresql://statsdb-prod.openaire.eu/stats
|
||||
db.username=sqoop
|
||||
db.password=sqoop
|
||||
db.defaultschema=usagestats
|
||||
matomo.AuthToken=703bd17d845acdaf795e01bb1e0895b9
|
||||
matomo.BaseUrl=analytics.openaire.eu
|
||||
#repo.LogPath=/Users/dpie/Desktop/Repologs/
|
||||
repo.LogPath=/user/spyros/logs/usage_stats_logs/Repologs
|
||||
portal.LogPath=/user/spyros/logs/usage_stats_logs/Portallogs/
|
||||
portal.MatomoID=109
|
||||
COUNTER.robots.Url=https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json
|
||||
IRUS_UK.BaseUrl=https://irus.jisc.ac.uk/api/sushilite/v1_7/
|
||||
#server.compression.enabled=true
|
||||
#compression.max_number_of_records=1000
|
||||
#usagestats.redis.hostname=localhost
|
||||
#usagestats.redis.port=6379
|
||||
#spring.jackson.serialization.INDENT_OUTPUT=true
|
||||
#download.folder=/Users/dpie/DownloadSushiLite
|
||||
#sushi-lite.server=http://localhost:8080
|
Loading…
Reference in New Issue