forked from D-Net/dnet-hadoop
Changes to make it compile successfully
This commit is contained in:
parent
af62b14f91
commit
cabe92d155
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.model;
|
||||
|
||||
import java.util.Map;
|
||||
|
@ -20,9 +21,11 @@ public class Event {
|
|||
|
||||
private Map<String, Object> map;
|
||||
|
||||
public Event() {}
|
||||
public Event() {
|
||||
}
|
||||
|
||||
public Event(final String producerId, final String eventId, final String topic, final String payload, final Long creationDate, final Long expiryDate,
|
||||
public Event(final String producerId, final String eventId, final String topic, final String payload,
|
||||
final Long creationDate, final Long expiryDate,
|
||||
final boolean instantMessage,
|
||||
final Map<String, Object> map) {
|
||||
this.producerId = producerId;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.model;
|
||||
|
||||
import java.text.ParseException;
|
||||
|
@ -38,8 +39,8 @@ public class EventFactory {
|
|||
|
||||
final String payload = createPayload(target, updateInfo);
|
||||
|
||||
final String eventId =
|
||||
calculateEventId(updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString());
|
||||
final String eventId = calculateEventId(
|
||||
updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString());
|
||||
|
||||
res.setEventId(eventId);
|
||||
res.setProducerId(PRODUCER_ID);
|
||||
|
@ -61,7 +62,8 @@ public class EventFactory {
|
|||
return payload.toJSON();
|
||||
}
|
||||
|
||||
private static Map<String, Object> createMapFromResult(final Result oaf, final Result source, final UpdateInfo<?> updateInfo) {
|
||||
private static Map<String, Object> createMapFromResult(final Result oaf, final Result source,
|
||||
final UpdateInfo<?> updateInfo) {
|
||||
final Map<String, Object> map = new HashMap<>();
|
||||
|
||||
final List<KeyValue> collectedFrom = oaf.getCollectedfrom();
|
||||
|
@ -87,12 +89,18 @@ public class EventFactory {
|
|||
|
||||
final List<StructuredProperty> subjects = oaf.getSubject();
|
||||
if (subjects.size() > 0) {
|
||||
map.put("target_publication_subject_list", subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
|
||||
map
|
||||
.put(
|
||||
"target_publication_subject_list",
|
||||
subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
final List<Author> authors = oaf.getAuthor();
|
||||
if (authors.size() > 0) {
|
||||
map.put("target_publication_author_list", authors.stream().map(Author::getFullname).collect(Collectors.toList()));
|
||||
map
|
||||
.put(
|
||||
"target_publication_author_list",
|
||||
authors.stream().map(Author::getFullname).collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
// PROVENANCE INFO
|
||||
|
@ -119,7 +127,9 @@ public class EventFactory {
|
|||
}
|
||||
|
||||
private static long parseDateTolong(final String date) {
|
||||
if (StringUtils.isBlank(date)) { return -1; }
|
||||
if (StringUtils.isBlank(date)) {
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
return DateUtils.parseDate(date, DATE_PATTERNS).getTime();
|
||||
} catch (final ParseException e) {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
@ -40,8 +41,10 @@ public class GenerateEventsApplication {
|
|||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils.toString(GenerateEventsApplication.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
|
||||
IOUtils
|
||||
.toString(
|
||||
GenerateEventsApplication.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
final Boolean isSparkSessionManaged = Optional
|
||||
|
@ -78,9 +81,12 @@ public class GenerateEventsApplication {
|
|||
for (final Result source : children) {
|
||||
for (final Result target : children) {
|
||||
if (source != target) {
|
||||
list.addAll(findUpdates(source, target).stream()
|
||||
.map(info -> EventFactory.newBrokerEvent(source, target, info))
|
||||
.collect(Collectors.toList()));
|
||||
list
|
||||
.addAll(
|
||||
findUpdates(source, target)
|
||||
.stream()
|
||||
.map(info -> EventFactory.newBrokerEvent(source, target, info))
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
@ -25,7 +26,8 @@ public class EnrichMissingProject extends UpdateInfo<Project> {
|
|||
|
||||
@Override
|
||||
public String getHighlightValueAsString() {
|
||||
return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram() + getHighlightValue().getCode();
|
||||
return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram()
|
||||
+ getHighlightValue().getCode();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
||||
|
|
|
@ -1,20 +1,41 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
|
||||
|
||||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<artifactId>dhp-workflows</artifactId >
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.1.7-SNAPSHOT</version>
|
||||
</parent>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>dhp-usage-stats-update</artifactId>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<version>2.2.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<version>2.4.5</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.googlecode.json-simple</groupId>
|
||||
<artifactId>json-simple</artifactId>
|
||||
<version>1.1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.json</groupId>
|
||||
<artifactId>json</artifactId>
|
||||
<version>20180130</version>
|
||||
<type>jar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>postgresql</groupId>
|
||||
<artifactId>postgresql</artifactId>
|
||||
<version>[9.1-901.jdbc4,9.5)</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
|
@ -29,4 +50,8 @@
|
|||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<properties>
|
||||
<maven.compiler.source>1.7</maven.compiler.source>
|
||||
<maven.compiler.target>1.7</maven.compiler.target>
|
||||
</properties>
|
||||
</project>
|
||||
|
|
|
@ -9,7 +9,6 @@ package eu.dnetlib.usagestats.export;
|
|||
/*
|
||||
@author dpie
|
||||
*/
|
||||
|
||||
/*
|
||||
@author dpie
|
||||
*/
|
||||
|
@ -23,7 +22,7 @@ import org.apache.log4j.Logger;
|
|||
|
||||
public abstract class ConnectDB {
|
||||
|
||||
private static Connection DB_CONNECTION;
|
||||
public static Connection DB_CONNECTION;
|
||||
|
||||
private static String dbURL;
|
||||
private static String dbUsername;
|
||||
|
|
|
@ -2,10 +2,8 @@
|
|||
package eu.dnetlib.usagestats.export;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author dpie
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author dpie
|
||||
*/
|
||||
|
|
|
@ -1,12 +1,6 @@
|
|||
|
||||
package eu.dnetlib.usagestats.export;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
|
@ -14,8 +8,14 @@ import java.sql.PreparedStatement;
|
|||
import java.sql.ResultSet;
|
||||
import java.sql.Statement;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
public class PiwikDownloadLogs {
|
||||
|
||||
|
@ -65,68 +65,76 @@ public class PiwikDownloadLogs {
|
|||
|
||||
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
|
||||
|
||||
Statement statement = ConnectDB.getConnection().createStatement();
|
||||
Statement statement = ConnectDB.getConnection().createStatement();
|
||||
|
||||
ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from public.datasource where piwik_id is not null order by piwik_id;");
|
||||
while (rs.next()) {
|
||||
int siteId = rs.getInt(1);
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
ResultSet rs = statement
|
||||
.executeQuery(
|
||||
"SELECT distinct piwik_id from public.datasource where piwik_id is not null order by piwik_id;");
|
||||
while (rs.next()) {
|
||||
int siteId = rs.getInt(1);
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
|
||||
|
||||
Calendar start = Calendar.getInstance();
|
||||
start.set(Calendar.YEAR, 2016);
|
||||
start.set(Calendar.MONTH, Calendar.MARCH);
|
||||
//start.setTime(simpleDateFormat.parse("2016-01"));
|
||||
Calendar start = Calendar.getInstance();
|
||||
start.set(Calendar.YEAR, 2016);
|
||||
start.set(Calendar.MONTH, Calendar.MARCH);
|
||||
// start.setTime(simpleDateFormat.parse("2016-01"));
|
||||
|
||||
Calendar end = Calendar.getInstance();
|
||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||
Calendar end = Calendar.getInstance();
|
||||
end.add(Calendar.DAY_OF_MONTH, -1);
|
||||
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||
PreparedStatement st = ConnectDB.DB_CONNECTION.prepareStatement("SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;");
|
||||
st.setInt(1, siteId);
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
|
||||
PreparedStatement st = ConnectDB.DB_CONNECTION
|
||||
.prepareStatement(
|
||||
"SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;");
|
||||
st.setInt(1, siteId);
|
||||
|
||||
ResultSet rs_date = st.executeQuery();
|
||||
while (rs_date.next()) {
|
||||
if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) {
|
||||
start.setTime(sdf.parse(rs_date.getString(1)));
|
||||
}
|
||||
}
|
||||
rs_date.close();
|
||||
ResultSet rs_date = st.executeQuery();
|
||||
while (rs_date.next()) {
|
||||
if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null")
|
||||
&& !rs_date.getString(1).equals("")) {
|
||||
start.setTime(sdf.parse(rs_date.getString(1)));
|
||||
}
|
||||
}
|
||||
rs_date.close();
|
||||
|
||||
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
||||
log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
|
||||
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
|
||||
log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
|
||||
|
||||
String period = "&period=day&date=" + sdf.format(date);
|
||||
String outFolder = "";
|
||||
//portal siteId = 109;
|
||||
if (siteId == Integer.parseInt(portalMatomoID)) {
|
||||
outFolder = portalLogPath;
|
||||
} else {
|
||||
outFolder = repoLogsPath;
|
||||
}
|
||||
FileSystem fs = FileSystem.get(new Configuration());
|
||||
FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
|
||||
String period = "&period=day&date=" + sdf.format(date);
|
||||
String outFolder = "";
|
||||
// portal siteId = 109;
|
||||
if (siteId == Integer.parseInt(portalMatomoID)) {
|
||||
outFolder = portalLogPath;
|
||||
} else {
|
||||
outFolder = repoLogsPath;
|
||||
}
|
||||
FileSystem fs = FileSystem.get(new Configuration());
|
||||
FSDataOutputStream fin = fs
|
||||
.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
|
||||
|
||||
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
|
||||
String content = "";
|
||||
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format
|
||||
+ "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
|
||||
String content = "";
|
||||
|
||||
int i = 0;
|
||||
int i = 0;
|
||||
|
||||
while (!content.equals("[]\n")) {
|
||||
String apiUrl = baseApiUrl;
|
||||
while (!content.equals("[]\n")) {
|
||||
String apiUrl = baseApiUrl;
|
||||
|
||||
if (i > 0) {
|
||||
apiUrl += "&filter_offset=" + (i * 1000);
|
||||
}
|
||||
if (i > 0) {
|
||||
apiUrl += "&filter_offset=" + (i * 1000);
|
||||
}
|
||||
|
||||
content = getJson(apiUrl);
|
||||
content = getJson(apiUrl);
|
||||
|
||||
fin.write(content.getBytes());
|
||||
fin.write(content.getBytes());
|
||||
|
||||
i++;
|
||||
}
|
||||
fin.close();
|
||||
i++;
|
||||
}
|
||||
fin.close();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,10 +7,8 @@
|
|||
package eu.dnetlib.usagestats.export;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author dpie
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author dpie
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue