forked from D-Net/dnet-hadoop
Getting the right piwik_ids from (graph) stats db
This commit is contained in:
parent
d770d7043d
commit
637e61bb0f
|
@ -44,7 +44,7 @@ public abstract class ConnectDB {
|
|||
|
||||
dbURL = "jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1";
|
||||
usageStatsDBSchema = "usagestats";
|
||||
statsDBSchema = "stats_wf_db_galexiou_oozie_beta";
|
||||
statsDBSchema = "openaire_prod_stats_shadow_20200821";
|
||||
|
||||
Class.forName("org.apache.hive.jdbc.HiveDriver");
|
||||
}
|
||||
|
|
|
@ -85,19 +85,12 @@ public class PiwikDownloadLogs {
|
|||
|
||||
Statement statement = ConnectDB.getConnection().createStatement();
|
||||
|
||||
// ResultSet rs = statement
|
||||
// .executeQuery(
|
||||
// "SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema()
|
||||
// + ".datasource where piwik_id is not null order by piwik_id");
|
||||
// while (rs.next()) {
|
||||
// int siteId = rs.getInt(1);
|
||||
|
||||
for (int manualSiteId : new int[] {
|
||||
13, 23
|
||||
// , 109 -> This seems to be empty
|
||||
}) {
|
||||
int siteId = manualSiteId; // Until the stats database (datasource table) provides a correct id, we set it
|
||||
// to 13
|
||||
ResultSet rs = statement
|
||||
.executeQuery(
|
||||
"SELECT distinct piwik_id from " + ConnectDB.getStatsDBSchema()
|
||||
+ ".datasource where piwik_id is not null and piwik_id <> 0 order by piwik_id");
|
||||
while (rs.next()) {
|
||||
int siteId = rs.getInt(1);
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
|
||||
Calendar start = Calendar.getInstance();
|
||||
|
|
|
@ -40,7 +40,9 @@ public class PiwikStatsDB {
|
|||
this.logRepoPath = logRepoPath;
|
||||
this.logPortalPath = logPortalPath;
|
||||
this.createTables();
|
||||
this.createTmpTables();
|
||||
// The piwiklog table is not needed since it is built
|
||||
// on top of JSON files
|
||||
// this.createTmpTables();
|
||||
}
|
||||
|
||||
public void foo() {
|
||||
|
|
Loading…
Reference in New Issue