Changes to make it compile successfully

This commit is contained in:
Spyros Zoupanos 2020-05-07 21:46:14 +03:00
parent af62b14f91
commit cabe92d155
19 changed files with 137 additions and 78 deletions

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.model;
import java.util.Map;
@ -20,9 +21,11 @@ public class Event {
private Map<String, Object> map;
public Event() {}
public Event() {
}
public Event(final String producerId, final String eventId, final String topic, final String payload, final Long creationDate, final Long expiryDate,
public Event(final String producerId, final String eventId, final String topic, final String payload,
final Long creationDate, final Long expiryDate,
final boolean instantMessage,
final Map<String, Object> map) {
this.producerId = producerId;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.model;
import java.text.ParseException;
@ -38,8 +39,8 @@ public class EventFactory {
final String payload = createPayload(target, updateInfo);
final String eventId =
calculateEventId(updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString());
final String eventId = calculateEventId(
updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString());
res.setEventId(eventId);
res.setProducerId(PRODUCER_ID);
@ -61,7 +62,8 @@ public class EventFactory {
return payload.toJSON();
}
private static Map<String, Object> createMapFromResult(final Result oaf, final Result source, final UpdateInfo<?> updateInfo) {
private static Map<String, Object> createMapFromResult(final Result oaf, final Result source,
final UpdateInfo<?> updateInfo) {
final Map<String, Object> map = new HashMap<>();
final List<KeyValue> collectedFrom = oaf.getCollectedfrom();
@ -87,12 +89,18 @@ public class EventFactory {
final List<StructuredProperty> subjects = oaf.getSubject();
if (subjects.size() > 0) {
map.put("target_publication_subject_list", subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
map
.put(
"target_publication_subject_list",
subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
}
final List<Author> authors = oaf.getAuthor();
if (authors.size() > 0) {
map.put("target_publication_author_list", authors.stream().map(Author::getFullname).collect(Collectors.toList()));
map
.put(
"target_publication_author_list",
authors.stream().map(Author::getFullname).collect(Collectors.toList()));
}
// PROVENANCE INFO
@ -119,7 +127,9 @@ public class EventFactory {
}
private static long parseDateTolong(final String date) {
if (StringUtils.isBlank(date)) { return -1; }
if (StringUtils.isBlank(date)) {
return -1;
}
try {
return DateUtils.parseDate(date, DATE_PATTERNS).getTime();
} catch (final ParseException e) {

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
@ -40,8 +41,10 @@ public class GenerateEventsApplication {
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(GenerateEventsApplication.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
IOUtils
.toString(
GenerateEventsApplication.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
parser.parseArgument(args);
final Boolean isSparkSessionManaged = Optional
@ -78,9 +81,12 @@ public class GenerateEventsApplication {
for (final Result source : children) {
for (final Result target : children) {
if (source != target) {
list.addAll(findUpdates(source, target).stream()
.map(info -> EventFactory.newBrokerEvent(source, target, info))
.collect(Collectors.toList()));
list
.addAll(
findUpdates(source, target)
.stream()
.map(info -> EventFactory.newBrokerEvent(source, target, info))
.collect(Collectors.toList()));
}
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;
@ -25,7 +26,8 @@ public class EnrichMissingProject extends UpdateInfo<Project> {
@Override
public String getHighlightValueAsString() {
return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram() + getHighlightValue().getCode();
return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram()
+ getHighlightValue().getCode();
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util;
import eu.dnetlib.broker.objects.OpenAireEventPayload;

View File

@ -1,20 +1,41 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>dhp-workflows</artifactId>
<parent>
<artifactId>dhp-workflows</artifactId >
<groupId>eu.dnetlib.dhp</groupId>
<version>1.1.7-SNAPSHOT</version>
</parent>
<groupId>eu.dnetlib</groupId>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-usage-stats-update</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.4.5</version>
</dependency>
<dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20180130</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>[9.1-901.jdbc4,9.5)</version>
</dependency>
</dependencies>
<build>
@ -29,4 +50,8 @@
</plugin>
</plugins>
</build>
<properties>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
</project>

View File

@ -9,7 +9,6 @@ package eu.dnetlib.usagestats.export;
/*
@author dpie
*/
/*
@author dpie
*/
@ -23,7 +22,7 @@ import org.apache.log4j.Logger;
public abstract class ConnectDB {
private static Connection DB_CONNECTION;
public static Connection DB_CONNECTION;
private static String dbURL;
private static String dbUsername;

View File

@ -2,10 +2,8 @@
package eu.dnetlib.usagestats.export;
/**
*
* @author dpie
*/
/**
* @author dpie
*/

View File

@ -1,12 +1,6 @@
package eu.dnetlib.usagestats.export;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.Logger;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
@ -14,8 +8,14 @@ import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Calendar;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
public class PiwikDownloadLogs {
@ -65,68 +65,76 @@ public class PiwikDownloadLogs {
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
Statement statement = ConnectDB.getConnection().createStatement();
ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from public.datasource where piwik_id is not null order by piwik_id;");
while (rs.next()) {
int siteId = rs.getInt(1);
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
Statement statement = ConnectDB.getConnection().createStatement();
Calendar start = Calendar.getInstance();
start.set(Calendar.YEAR, 2016);
start.set(Calendar.MONTH, Calendar.MARCH);
//start.setTime(simpleDateFormat.parse("2016-01"));
ResultSet rs = statement
.executeQuery(
"SELECT distinct piwik_id from public.datasource where piwik_id is not null order by piwik_id;");
while (rs.next()) {
int siteId = rs.getInt(1);
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
Calendar end = Calendar.getInstance();
end.add(Calendar.DAY_OF_MONTH, -1);
Calendar start = Calendar.getInstance();
start.set(Calendar.YEAR, 2016);
start.set(Calendar.MONTH, Calendar.MARCH);
// start.setTime(simpleDateFormat.parse("2016-01"));
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
PreparedStatement st = ConnectDB.DB_CONNECTION.prepareStatement("SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;");
st.setInt(1, siteId);
Calendar end = Calendar.getInstance();
end.add(Calendar.DAY_OF_MONTH, -1);
ResultSet rs_date = st.executeQuery();
while (rs_date.next()) {
if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) {
start.setTime(sdf.parse(rs_date.getString(1)));
}
}
rs_date.close();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
PreparedStatement st = ConnectDB.DB_CONNECTION
.prepareStatement(
"SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;");
st.setInt(1, siteId);
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
ResultSet rs_date = st.executeQuery();
while (rs_date.next()) {
if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null")
&& !rs_date.getString(1).equals("")) {
start.setTime(sdf.parse(rs_date.getString(1)));
}
}
rs_date.close();
String period = "&period=day&date=" + sdf.format(date);
String outFolder = "";
//portal siteId = 109;
if (siteId == Integer.parseInt(portalMatomoID)) {
outFolder = portalLogPath;
} else {
outFolder = repoLogsPath;
}
FileSystem fs = FileSystem.get(new Configuration());
FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
String content = "";
String period = "&period=day&date=" + sdf.format(date);
String outFolder = "";
// portal siteId = 109;
if (siteId == Integer.parseInt(portalMatomoID)) {
outFolder = portalLogPath;
} else {
outFolder = repoLogsPath;
}
FileSystem fs = FileSystem.get(new Configuration());
FSDataOutputStream fin = fs
.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
int i = 0;
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format
+ "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
String content = "";
while (!content.equals("[]\n")) {
String apiUrl = baseApiUrl;
int i = 0;
if (i > 0) {
apiUrl += "&filter_offset=" + (i * 1000);
}
while (!content.equals("[]\n")) {
String apiUrl = baseApiUrl;
content = getJson(apiUrl);
if (i > 0) {
apiUrl += "&filter_offset=" + (i * 1000);
}
fin.write(content.getBytes());
content = getJson(apiUrl);
i++;
}
fin.close();
fin.write(content.getBytes());
}
i++;
}
fin.close();
}
}
}
}
}

View File

@ -7,10 +7,8 @@
package eu.dnetlib.usagestats.export;
/**
*
* @author dpie
*/
/**
* @author dpie
*/