Changes to make it compile successfully

This commit is contained in:
Spyros Zoupanos 2020-05-07 21:46:14 +03:00
parent af62b14f91
commit cabe92d155
19 changed files with 137 additions and 78 deletions

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.model; package eu.dnetlib.dhp.broker.model;
import java.util.Map; import java.util.Map;
@ -20,9 +21,11 @@ public class Event {
private Map<String, Object> map; private Map<String, Object> map;
public Event() {} public Event() {
}
public Event(final String producerId, final String eventId, final String topic, final String payload, final Long creationDate, final Long expiryDate, public Event(final String producerId, final String eventId, final String topic, final String payload,
final Long creationDate, final Long expiryDate,
final boolean instantMessage, final boolean instantMessage,
final Map<String, Object> map) { final Map<String, Object> map) {
this.producerId = producerId; this.producerId = producerId;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.model; package eu.dnetlib.dhp.broker.model;
import java.text.ParseException; import java.text.ParseException;
@ -38,8 +39,8 @@ public class EventFactory {
final String payload = createPayload(target, updateInfo); final String payload = createPayload(target, updateInfo);
final String eventId = final String eventId = calculateEventId(
calculateEventId(updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString()); updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString());
res.setEventId(eventId); res.setEventId(eventId);
res.setProducerId(PRODUCER_ID); res.setProducerId(PRODUCER_ID);
@ -61,7 +62,8 @@ public class EventFactory {
return payload.toJSON(); return payload.toJSON();
} }
private static Map<String, Object> createMapFromResult(final Result oaf, final Result source, final UpdateInfo<?> updateInfo) { private static Map<String, Object> createMapFromResult(final Result oaf, final Result source,
final UpdateInfo<?> updateInfo) {
final Map<String, Object> map = new HashMap<>(); final Map<String, Object> map = new HashMap<>();
final List<KeyValue> collectedFrom = oaf.getCollectedfrom(); final List<KeyValue> collectedFrom = oaf.getCollectedfrom();
@ -87,12 +89,18 @@ public class EventFactory {
final List<StructuredProperty> subjects = oaf.getSubject(); final List<StructuredProperty> subjects = oaf.getSubject();
if (subjects.size() > 0) { if (subjects.size() > 0) {
map.put("target_publication_subject_list", subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList())); map
.put(
"target_publication_subject_list",
subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
} }
final List<Author> authors = oaf.getAuthor(); final List<Author> authors = oaf.getAuthor();
if (authors.size() > 0) { if (authors.size() > 0) {
map.put("target_publication_author_list", authors.stream().map(Author::getFullname).collect(Collectors.toList())); map
.put(
"target_publication_author_list",
authors.stream().map(Author::getFullname).collect(Collectors.toList()));
} }
// PROVENANCE INFO // PROVENANCE INFO
@ -119,7 +127,9 @@ public class EventFactory {
} }
private static long parseDateTolong(final String date) { private static long parseDateTolong(final String date) {
if (StringUtils.isBlank(date)) { return -1; } if (StringUtils.isBlank(date)) {
return -1;
}
try { try {
return DateUtils.parseDate(date, DATE_PATTERNS).getTime(); return DateUtils.parseDate(date, DATE_PATTERNS).getTime();
} catch (final ParseException e) { } catch (final ParseException e) {

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa; package eu.dnetlib.dhp.broker.oa;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
@ -40,8 +41,10 @@ public class GenerateEventsApplication {
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(GenerateEventsApplication.class IOUtils
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); .toString(
GenerateEventsApplication.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
parser.parseArgument(args); parser.parseArgument(args);
final Boolean isSparkSessionManaged = Optional final Boolean isSparkSessionManaged = Optional
@ -78,9 +81,12 @@ public class GenerateEventsApplication {
for (final Result source : children) { for (final Result source : children) {
for (final Result target : children) { for (final Result target : children) {
if (source != target) { if (source != target) {
list.addAll(findUpdates(source, target).stream() list
.map(info -> EventFactory.newBrokerEvent(source, target, info)) .addAll(
.collect(Collectors.toList())); findUpdates(source, target)
.stream()
.map(info -> EventFactory.newBrokerEvent(source, target, info))
.collect(Collectors.toList()));
} }
} }
} }

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;
@ -25,7 +26,8 @@ public class EnrichMissingProject extends UpdateInfo<Project> {
@Override @Override
public String getHighlightValueAsString() { public String getHighlightValueAsString() {
return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram() + getHighlightValue().getCode(); return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram()
+ getHighlightValue().getCode();
} }
} }

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import eu.dnetlib.broker.objects.OpenAireEventPayload; import eu.dnetlib.broker.objects.OpenAireEventPayload;

View File

@ -1,20 +1,41 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId >
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.1.7-SNAPSHOT</version> <version>1.1.7-SNAPSHOT</version>
</parent> </parent>
<groupId>eu.dnetlib</groupId>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>dhp-usage-stats-update</artifactId> <artifactId>dhp-usage-stats-update</artifactId>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_2.11</artifactId>
<version>2.2.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_2.11</artifactId>
<version>2.4.5</version>
</dependency>
<dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20180130</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>[9.1-901.jdbc4,9.5)</version>
</dependency> </dependency>
</dependencies> </dependencies>
<build> <build>
@ -29,4 +50,8 @@
</plugin> </plugin>
</plugins> </plugins>
</build> </build>
<properties>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
</project> </project>

View File

@ -9,7 +9,6 @@ package eu.dnetlib.usagestats.export;
/* /*
@author dpie @author dpie
*/ */
/* /*
@author dpie @author dpie
*/ */
@ -23,7 +22,7 @@ import org.apache.log4j.Logger;
public abstract class ConnectDB { public abstract class ConnectDB {
private static Connection DB_CONNECTION; public static Connection DB_CONNECTION;
private static String dbURL; private static String dbURL;
private static String dbUsername; private static String dbUsername;

View File

@ -2,10 +2,8 @@
package eu.dnetlib.usagestats.export; package eu.dnetlib.usagestats.export;
/** /**
*
* @author dpie * @author dpie
*/ */
/** /**
* @author dpie * @author dpie
*/ */

View File

@ -1,12 +1,6 @@
package eu.dnetlib.usagestats.export; package eu.dnetlib.usagestats.export;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.Logger;
import java.io.*; import java.io.*;
import java.net.URL; import java.net.URL;
import java.net.URLConnection; import java.net.URLConnection;
@ -14,8 +8,14 @@ import java.sql.PreparedStatement;
import java.sql.ResultSet; import java.sql.ResultSet;
import java.sql.Statement; import java.sql.Statement;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Calendar; import java.util.Calendar;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
public class PiwikDownloadLogs { public class PiwikDownloadLogs {
@ -65,68 +65,76 @@ public class PiwikDownloadLogs {
public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception { public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception {
Statement statement = ConnectDB.getConnection().createStatement(); Statement statement = ConnectDB.getConnection().createStatement();
ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from public.datasource where piwik_id is not null order by piwik_id;"); ResultSet rs = statement
while (rs.next()) { .executeQuery(
int siteId = rs.getInt(1); "SELECT distinct piwik_id from public.datasource where piwik_id is not null order by piwik_id;");
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); while (rs.next()) {
int siteId = rs.getInt(1);
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM");
Calendar start = Calendar.getInstance(); Calendar start = Calendar.getInstance();
start.set(Calendar.YEAR, 2016); start.set(Calendar.YEAR, 2016);
start.set(Calendar.MONTH, Calendar.MARCH); start.set(Calendar.MONTH, Calendar.MARCH);
//start.setTime(simpleDateFormat.parse("2016-01")); // start.setTime(simpleDateFormat.parse("2016-01"));
Calendar end = Calendar.getInstance(); Calendar end = Calendar.getInstance();
end.add(Calendar.DAY_OF_MONTH, -1); end.add(Calendar.DAY_OF_MONTH, -1);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
PreparedStatement st = ConnectDB.DB_CONNECTION.prepareStatement("SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;"); PreparedStatement st = ConnectDB.DB_CONNECTION
st.setInt(1, siteId); .prepareStatement(
"SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;");
st.setInt(1, siteId);
ResultSet rs_date = st.executeQuery(); ResultSet rs_date = st.executeQuery();
while (rs_date.next()) { while (rs_date.next()) {
if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) { if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null")
start.setTime(sdf.parse(rs_date.getString(1))); && !rs_date.getString(1).equals("")) {
} start.setTime(sdf.parse(rs_date.getString(1)));
} }
rs_date.close(); }
rs_date.close();
for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) { for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date)); log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date));
String period = "&period=day&date=" + sdf.format(date); String period = "&period=day&date=" + sdf.format(date);
String outFolder = ""; String outFolder = "";
//portal siteId = 109; // portal siteId = 109;
if (siteId == Integer.parseInt(portalMatomoID)) { if (siteId == Integer.parseInt(portalMatomoID)) {
outFolder = portalLogPath; outFolder = portalLogPath;
} else { } else {
outFolder = repoLogsPath; outFolder = repoLogsPath;
} }
FileSystem fs = FileSystem.get(new Configuration()); FileSystem fs = FileSystem.get(new Configuration());
FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true); FSDataOutputStream fin = fs
.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true);
String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth; String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format
String content = ""; + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth;
String content = "";
int i = 0; int i = 0;
while (!content.equals("[]\n")) { while (!content.equals("[]\n")) {
String apiUrl = baseApiUrl; String apiUrl = baseApiUrl;
if (i > 0) { if (i > 0) {
apiUrl += "&filter_offset=" + (i * 1000); apiUrl += "&filter_offset=" + (i * 1000);
} }
content = getJson(apiUrl); content = getJson(apiUrl);
fin.write(content.getBytes()); fin.write(content.getBytes());
i++; i++;
} }
fin.close(); fin.close();
} }
} }
}
} }

View File

@ -7,10 +7,8 @@
package eu.dnetlib.usagestats.export; package eu.dnetlib.usagestats.export;
/** /**
*
* @author dpie * @author dpie
*/ */
/** /**
* @author dpie * @author dpie
*/ */