diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java index e9df9260c..0512a3813 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/Event.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.model; import java.util.Map; @@ -20,9 +21,11 @@ public class Event { private Map map; - public Event() {} + public Event() { + } - public Event(final String producerId, final String eventId, final String topic, final String payload, final Long creationDate, final Long expiryDate, + public Event(final String producerId, final String eventId, final String topic, final String payload, + final Long creationDate, final Long expiryDate, final boolean instantMessage, final Map map) { this.producerId = producerId; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java index 6bee65eb0..0694556b2 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.model; import java.text.ParseException; @@ -38,8 +39,8 @@ public class EventFactory { final String payload = createPayload(target, updateInfo); - final String eventId = - calculateEventId(updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString()); + final String eventId = calculateEventId( + updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString()); res.setEventId(eventId); res.setProducerId(PRODUCER_ID); @@ -61,7 +62,8 @@ public class EventFactory { return payload.toJSON(); } - private static Map createMapFromResult(final Result oaf, final Result source, final UpdateInfo updateInfo) { + private static Map createMapFromResult(final Result oaf, final Result source, + final UpdateInfo updateInfo) { final Map map = new HashMap<>(); final List collectedFrom = oaf.getCollectedfrom(); @@ -87,12 +89,18 @@ public class EventFactory { final List subjects = oaf.getSubject(); if (subjects.size() > 0) { - map.put("target_publication_subject_list", subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList())); + map + .put( + "target_publication_subject_list", + subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList())); } final List authors = oaf.getAuthor(); if (authors.size() > 0) { - map.put("target_publication_author_list", authors.stream().map(Author::getFullname).collect(Collectors.toList())); + map + .put( + "target_publication_author_list", + authors.stream().map(Author::getFullname).collect(Collectors.toList())); } // PROVENANCE INFO @@ -119,7 +127,9 @@ public class EventFactory { } private static long parseDateTolong(final String date) { - if (StringUtils.isBlank(date)) { return -1; } + if (StringUtils.isBlank(date)) { + return -1; + } try { return DateUtils.parseDate(date, DATE_PATTERNS).getTime(); } catch (final ParseException e) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index 7b0ed0882..54d4ef36a 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; @@ -40,8 +41,10 @@ public class GenerateEventsApplication { public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils.toString(GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + IOUtils + .toString( + GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -78,9 +81,12 @@ public class GenerateEventsApplication { for (final Result source : children) { for (final Result target : children) { if (source != target) { - list.addAll(findUpdates(source, target).stream() - .map(info -> EventFactory.newBrokerEvent(source, target, info)) - .collect(Collectors.toList())); + list + .addAll( + findUpdates(source, target) + .stream() + .map(info -> EventFactory.newBrokerEvent(source, target, info)) + .collect(Collectors.toList())); } } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java index 5821adf1e..493d1f97c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAbstract.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java index 6a8f36ed8..6899c62a3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingAuthorOrcid.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java index cd05b18a0..9464130f3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingOpenAccess.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java index 1bab54188..293d4993f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPid.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java index 368c2babe..a22c179a2 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingProject.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; @@ -25,7 +26,8 @@ public class EnrichMissingProject extends UpdateInfo { @Override public String getHighlightValueAsString() { - return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram() + getHighlightValue().getCode(); + return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram() + + getHighlightValue().getCode(); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java index abce480e3..869dca264 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingPublicationDate.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java index 6533d8487..a2ed5d043 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMissingSubject.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java index 9b79d5828..4f1e88d3d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreOpenAccess.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java index 37b9cfeb8..ecf2cf310 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMorePid.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java index 84e2bbe5e..f29b86292 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EnrichMoreSubject.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.util.Arrays; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index 73f3f2f5a..f7b6b69e9 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import eu.dnetlib.broker.objects.OpenAireEventPayload; diff --git a/dhp-workflows/dhp-usage-stats-update/pom.xml b/dhp-workflows/dhp-usage-stats-update/pom.xml index f85872fbd..7c07c8e1f 100644 --- a/dhp-workflows/dhp-usage-stats-update/pom.xml +++ b/dhp-workflows/dhp-usage-stats-update/pom.xml @@ -1,20 +1,41 @@ - - dhp-workflows + + + + dhp-workflows eu.dnetlib.dhp 1.1.7-SNAPSHOT + eu.dnetlib 4.0.0 dhp-usage-stats-update org.apache.spark spark-core_2.11 + 2.2.0 org.apache.spark spark-sql_2.11 + 2.4.5 + + + com.googlecode.json-simple + json-simple + 1.1.1 + + + org.json + json + 20180130 + jar + + + postgresql + postgresql + [9.1-901.jdbc4,9.5) @@ -29,4 +50,8 @@ + + 1.7 + 1.7 + diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ConnectDB.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ConnectDB.java index d4b9e6786..f0a6caf82 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ConnectDB.java @@ -9,7 +9,6 @@ package eu.dnetlib.usagestats.export; /* @author dpie */ - /* @author dpie */ @@ -23,7 +22,7 @@ import org.apache.log4j.Logger; public abstract class ConnectDB { - private static Connection DB_CONNECTION; + public static Connection DB_CONNECTION; private static String dbURL; private static String dbUsername; diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/IrusStats.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/IrusStats.java index 8062ce428..30d73f4e7 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/IrusStats.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/IrusStats.java @@ -2,10 +2,8 @@ package eu.dnetlib.usagestats.export; /** - * * @author dpie */ - /** * @author dpie */ diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/PiwikDownloadLogs.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/PiwikDownloadLogs.java index ab6645c3e..92056d55f 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/PiwikDownloadLogs.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/PiwikDownloadLogs.java @@ -1,12 +1,6 @@ package eu.dnetlib.usagestats.export; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.FileSystem; -import org.apache.log4j.Logger; - import java.io.*; import java.net.URL; import java.net.URLConnection; @@ -14,8 +8,14 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.Statement; import java.text.SimpleDateFormat; -import java.util.Date; import java.util.Calendar; +import java.util.Date; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.log4j.Logger; public class PiwikDownloadLogs { @@ -65,68 +65,76 @@ public class PiwikDownloadLogs { public void GetOpenAIRELogs(String repoLogsPath, String portalLogPath, String portalMatomoID) throws Exception { - Statement statement = ConnectDB.getConnection().createStatement(); - - ResultSet rs = statement.executeQuery("SELECT distinct piwik_id from public.datasource where piwik_id is not null order by piwik_id;"); - while (rs.next()) { - int siteId = rs.getInt(1); - SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); + Statement statement = ConnectDB.getConnection().createStatement(); - Calendar start = Calendar.getInstance(); - start.set(Calendar.YEAR, 2016); - start.set(Calendar.MONTH, Calendar.MARCH); - //start.setTime(simpleDateFormat.parse("2016-01")); + ResultSet rs = statement + .executeQuery( + "SELECT distinct piwik_id from public.datasource where piwik_id is not null order by piwik_id;"); + while (rs.next()) { + int siteId = rs.getInt(1); + SimpleDateFormat simpleDateFormat = new SimpleDateFormat("YYYY-MM"); - Calendar end = Calendar.getInstance(); - end.add(Calendar.DAY_OF_MONTH, -1); + Calendar start = Calendar.getInstance(); + start.set(Calendar.YEAR, 2016); + start.set(Calendar.MONTH, Calendar.MARCH); + // start.setTime(simpleDateFormat.parse("2016-01")); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); - PreparedStatement st = ConnectDB.DB_CONNECTION.prepareStatement("SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;"); - st.setInt(1, siteId); + Calendar end = Calendar.getInstance(); + end.add(Calendar.DAY_OF_MONTH, -1); - ResultSet rs_date = st.executeQuery(); - while (rs_date.next()) { - if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") && !rs_date.getString(1).equals("")) { - start.setTime(sdf.parse(rs_date.getString(1))); - } - } - rs_date.close(); + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); + PreparedStatement st = ConnectDB.DB_CONNECTION + .prepareStatement( + "SELECT max(timestamp) FROM piwiklog WHERE source=? HAVING max(timestamp) is not null;"); + st.setInt(1, siteId); - for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) { - log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date)); + ResultSet rs_date = st.executeQuery(); + while (rs_date.next()) { + if (rs_date.getString(1) != null && !rs_date.getString(1).equals("null") + && !rs_date.getString(1).equals("")) { + start.setTime(sdf.parse(rs_date.getString(1))); + } + } + rs_date.close(); - String period = "&period=day&date=" + sdf.format(date); - String outFolder = ""; - //portal siteId = 109; - if (siteId == Integer.parseInt(portalMatomoID)) { - outFolder = portalLogPath; - } else { - outFolder = repoLogsPath; - } - FileSystem fs = FileSystem.get(new Configuration()); - FSDataOutputStream fin = fs.create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true); + for (Date date = start.getTime(); start.before(end); start.add(Calendar.DATE, 1), date = start.getTime()) { + log.info("Downloading logs for repoid " + siteId + " and for " + sdf.format(date)); - String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth; - String content = ""; + String period = "&period=day&date=" + sdf.format(date); + String outFolder = ""; + // portal siteId = 109; + if (siteId == Integer.parseInt(portalMatomoID)) { + outFolder = portalLogPath; + } else { + outFolder = repoLogsPath; + } + FileSystem fs = FileSystem.get(new Configuration()); + FSDataOutputStream fin = fs + .create(new Path(outFolder + "/" + siteId + "_Piwiklog" + sdf.format((date)) + ".json"), true); - int i = 0; + String baseApiUrl = getPiwikLogUrl() + APImethod + "&idSite=" + siteId + period + format + + "&expanded=5&filter_limit=1000&token_auth=" + tokenAuth; + String content = ""; - while (!content.equals("[]\n")) { - String apiUrl = baseApiUrl; + int i = 0; - if (i > 0) { - apiUrl += "&filter_offset=" + (i * 1000); - } + while (!content.equals("[]\n")) { + String apiUrl = baseApiUrl; - content = getJson(apiUrl); + if (i > 0) { + apiUrl += "&filter_offset=" + (i * 1000); + } - fin.write(content.getBytes()); + content = getJson(apiUrl); - i++; - } - fin.close(); + fin.write(content.getBytes()); - } + i++; + } + fin.close(); - } + } + + } + } } diff --git a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ReadCounterRobotsList.java b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ReadCounterRobotsList.java index e840e6e6c..8ce16d29d 100644 --- a/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ReadCounterRobotsList.java +++ b/dhp-workflows/dhp-usage-stats-update/src/main/java/eu/dnetlib/oa/graph/usage-stats/export/ReadCounterRobotsList.java @@ -7,10 +7,8 @@ package eu.dnetlib.usagestats.export; /** - * * @author dpie */ - /** * @author dpie */