forked from D-Net/dnet-hadoop
fixed conflicts
This commit is contained in:
commit
de6c4d46d8
|
@ -19,7 +19,7 @@
|
||||||
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
|
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
|
||||||
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
|
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
|
||||||
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_switch_statement" value="common_lines"/>
|
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_switch_statement" value="common_lines"/>
|
||||||
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
|
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="false"/>
|
||||||
<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
|
<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
|
||||||
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
|
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
|
||||||
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_enum_constant_declaration" value="common_lines"/>
|
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_enum_constant_declaration" value="common_lines"/>
|
||||||
|
|
|
@ -24,8 +24,10 @@ public class Instance implements Serializable {
|
||||||
|
|
||||||
private String type;
|
private String type;
|
||||||
|
|
||||||
|
|
||||||
private List<String> url;
|
private List<String> url;
|
||||||
|
|
||||||
|
|
||||||
private String publicationdate;// dateofacceptance;
|
private String publicationdate;// dateofacceptance;
|
||||||
|
|
||||||
private String refereed; // peer-review status
|
private String refereed; // peer-review status
|
||||||
|
|
|
@ -31,6 +31,10 @@
|
||||||
<artifactId>elasticsearch-hadoop</artifactId>
|
<artifactId>elasticsearch-hadoop</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class ConditionParams implements Serializable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private static final long serialVersionUID = 2719901844537516110L;
|
||||||
|
|
||||||
|
private String value;
|
||||||
|
private String otherValue;
|
||||||
|
|
||||||
|
public String getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setValue(final String value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOtherValue() {
|
||||||
|
return otherValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOtherValue(final String otherValue) {
|
||||||
|
this.otherValue = otherValue;
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,7 +2,6 @@
|
||||||
package eu.dnetlib.dhp.broker.model;
|
package eu.dnetlib.dhp.broker.model;
|
||||||
|
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.util.Date;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@ -19,16 +18,12 @@ public class EventFactory {
|
||||||
|
|
||||||
private final static String PRODUCER_ID = "OpenAIRE";
|
private final static String PRODUCER_ID = "OpenAIRE";
|
||||||
|
|
||||||
private static final int TTH_DAYS = 365;
|
|
||||||
|
|
||||||
private final static String[] DATE_PATTERNS = {
|
private final static String[] DATE_PATTERNS = {
|
||||||
"yyyy-MM-dd"
|
"yyyy-MM-dd"
|
||||||
};
|
};
|
||||||
|
|
||||||
public static Event newBrokerEvent(final UpdateInfo<?> updateInfo) {
|
public static Event newBrokerEvent(final UpdateInfo<?> updateInfo) {
|
||||||
|
|
||||||
final long now = new Date().getTime();
|
|
||||||
|
|
||||||
final Event res = new Event();
|
final Event res = new Event();
|
||||||
|
|
||||||
final MappedFields map = createMapFromResult(updateInfo);
|
final MappedFields map = createMapFromResult(updateInfo);
|
||||||
|
@ -44,8 +39,8 @@ public class EventFactory {
|
||||||
res.setPayload(updateInfo.asBrokerPayload().toJSON());
|
res.setPayload(updateInfo.asBrokerPayload().toJSON());
|
||||||
res.setMap(map);
|
res.setMap(map);
|
||||||
res.setTopic(updateInfo.getTopicPath());
|
res.setTopic(updateInfo.getTopicPath());
|
||||||
res.setCreationDate(now);
|
res.setCreationDate(0l);
|
||||||
res.setExpiryDate(calculateExpiryDate(now));
|
res.setExpiryDate(Long.MAX_VALUE);
|
||||||
res.setInstantMessage(false);
|
res.setInstantMessage(false);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
|
@ -96,7 +91,9 @@ public class EventFactory {
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String calculateEventId(final String topic, final String dsId, final String publicationId,
|
private static String calculateEventId(final String topic,
|
||||||
|
final String dsId,
|
||||||
|
final String publicationId,
|
||||||
final String value) {
|
final String value) {
|
||||||
return "event-"
|
return "event-"
|
||||||
+ DigestUtils.md5Hex(topic).substring(0, 4) + "-"
|
+ DigestUtils.md5Hex(topic).substring(0, 4) + "-"
|
||||||
|
@ -105,10 +102,6 @@ public class EventFactory {
|
||||||
+ DigestUtils.md5Hex(value).substring(0, 5);
|
+ DigestUtils.md5Hex(value).substring(0, 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static long calculateExpiryDate(final long now) {
|
|
||||||
return now + TTH_DAYS * 24 * 60 * 60 * 1000;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static long parseDateTolong(final String date) {
|
private static long parseDateTolong(final String date) {
|
||||||
if (StringUtils.isBlank(date)) {
|
if (StringUtils.isBlank(date)) {
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
public class MapCondition implements Serializable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private static final long serialVersionUID = -7137490975452466813L;
|
||||||
|
|
||||||
|
private String field;
|
||||||
|
private List<ConditionParams> listParams = new ArrayList<>();
|
||||||
|
|
||||||
|
public String getField() {
|
||||||
|
return field;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setField(final String field) {
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ConditionParams> getListParams() {
|
||||||
|
return listParams;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setListParams(final List<ConditionParams> listParams) {
|
||||||
|
this.listParams = listParams;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,93 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Notification implements Serializable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private static final long serialVersionUID = -1770420972526995727L;
|
||||||
|
|
||||||
|
private String notificationId;
|
||||||
|
|
||||||
|
private String subscriptionId;
|
||||||
|
|
||||||
|
private String producerId;
|
||||||
|
|
||||||
|
private String eventId;
|
||||||
|
|
||||||
|
private String topic;
|
||||||
|
|
||||||
|
private Long date;
|
||||||
|
|
||||||
|
private String payload;
|
||||||
|
|
||||||
|
private MappedFields map;
|
||||||
|
|
||||||
|
public String getNotificationId() {
|
||||||
|
return notificationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNotificationId(final String notificationId) {
|
||||||
|
this.notificationId = notificationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSubscriptionId() {
|
||||||
|
return subscriptionId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubscriptionId(final String subscriptionId) {
|
||||||
|
this.subscriptionId = subscriptionId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getProducerId() {
|
||||||
|
return producerId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProducerId(final String producerId) {
|
||||||
|
this.producerId = producerId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEventId() {
|
||||||
|
return eventId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEventId(final String eventId) {
|
||||||
|
this.eventId = eventId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTopic() {
|
||||||
|
return topic;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTopic(final String topic) {
|
||||||
|
this.topic = topic;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPayload() {
|
||||||
|
return payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPayload(final String payload) {
|
||||||
|
this.payload = payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MappedFields getMap() {
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMap(final MappedFields map) {
|
||||||
|
this.map = map;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long getDate() {
|
||||||
|
return date;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDate(final Long date) {
|
||||||
|
this.date = date;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,74 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
public class Subscription implements Serializable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private static final long serialVersionUID = 1051702214740830010L;
|
||||||
|
|
||||||
|
private String subscriptionId;
|
||||||
|
|
||||||
|
private String subscriber;
|
||||||
|
|
||||||
|
private String topic;
|
||||||
|
|
||||||
|
private String conditions;
|
||||||
|
|
||||||
|
public String getSubscriptionId() {
|
||||||
|
return subscriptionId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubscriptionId(final String subscriptionId) {
|
||||||
|
this.subscriptionId = subscriptionId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSubscriber() {
|
||||||
|
return subscriber;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubscriber(final String subscriber) {
|
||||||
|
this.subscriber = subscriber;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTopic() {
|
||||||
|
return topic;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTopic(final String topic) {
|
||||||
|
this.topic = topic;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getConditions() {
|
||||||
|
return conditions;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConditions(final String conditions) {
|
||||||
|
this.conditions = conditions;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, List<ConditionParams>> conditionsAsMap() {
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
try {
|
||||||
|
final List<MapCondition> list = mapper
|
||||||
|
.readValue(
|
||||||
|
getConditions(), mapper.getTypeFactory().constructCollectionType(List.class, MapCondition.class));
|
||||||
|
return list
|
||||||
|
.stream()
|
||||||
|
.filter(mc -> !mc.getListParams().isEmpty())
|
||||||
|
.collect(Collectors.toMap(MapCondition::getField, MapCondition::getListParams));
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -3,11 +3,16 @@ package eu.dnetlib.dhp.broker.oa;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
|
import org.apache.http.client.methods.HttpGet;
|
||||||
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClients;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.sql.Dataset;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.TypedColumn;
|
import org.apache.spark.sql.TypedColumn;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -28,8 +33,8 @@ public class GenerateStatsJob {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
IndexOnESJob.class
|
GenerateStatsJob.class
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/stats_params.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
final Boolean isSparkSessionManaged = Optional
|
final Boolean isSparkSessionManaged = Optional
|
||||||
|
@ -43,21 +48,50 @@ public class GenerateStatsJob {
|
||||||
final String eventsPath = parser.get("workingPath") + "/events";
|
final String eventsPath = parser.get("workingPath") + "/events";
|
||||||
log.info("eventsPath: {}", eventsPath);
|
log.info("eventsPath: {}", eventsPath);
|
||||||
|
|
||||||
final String statsPath = parser.get("workingPath") + "/stats";
|
final String dbUrl = parser.get("dbUrl");
|
||||||
log.info("stats: {}", statsPath);
|
log.info("dbUrl: {}", dbUrl);
|
||||||
|
|
||||||
|
final String dbUser = parser.get("dbUser");
|
||||||
|
log.info("dbUser: {}", dbUser);
|
||||||
|
|
||||||
|
final String dbPassword = parser.get("dbPassword");
|
||||||
|
log.info("dbPassword: {}", "***");
|
||||||
|
|
||||||
|
final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
|
||||||
|
log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);
|
||||||
|
|
||||||
final TypedColumn<Event, DatasourceStats> aggr = new StatsAggregator().toColumn();
|
final TypedColumn<Event, DatasourceStats> aggr = new StatsAggregator().toColumn();
|
||||||
|
|
||||||
|
final Properties connectionProperties = new Properties();
|
||||||
|
connectionProperties.put("user", dbUser);
|
||||||
|
connectionProperties.put("password", dbPassword);
|
||||||
|
|
||||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||||
|
|
||||||
final Dataset<DatasourceStats> stats = ClusterUtils
|
ClusterUtils
|
||||||
.readPath(spark, eventsPath, Event.class)
|
.readPath(spark, eventsPath, Event.class)
|
||||||
.groupByKey(e -> e.getMap().getTargetDatasourceId(), Encoders.STRING())
|
.groupByKey(e -> e.getTopic() + "@@@" + e.getMap().getTargetDatasourceId(), Encoders.STRING())
|
||||||
.agg(aggr)
|
.agg(aggr)
|
||||||
.map(t -> t._2, Encoders.bean(DatasourceStats.class));
|
.map(t -> t._2, Encoders.bean(DatasourceStats.class))
|
||||||
|
.write()
|
||||||
|
.jdbc(dbUrl, "oa_datasource_stats_temp", connectionProperties);
|
||||||
|
|
||||||
|
log.info("*** updateStats");
|
||||||
|
updateStats(brokerApiBaseUrl);
|
||||||
|
log.info("*** ALL done.");
|
||||||
|
|
||||||
ClusterUtils.save(stats, statsPath, DatasourceStats.class, null);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String updateStats(final String brokerApiBaseUrl) throws IOException {
|
||||||
|
final String url = brokerApiBaseUrl + "/api/openaireBroker/stats/update";
|
||||||
|
final HttpGet req = new HttpGet(url);
|
||||||
|
|
||||||
|
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||||
|
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||||
|
return IOUtils.toString(response.getEntity().getContent());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,126 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.oa;
|
||||||
|
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.math.NumberUtils;
|
||||||
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
|
import org.apache.http.client.methods.HttpDelete;
|
||||||
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClients;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.apache.spark.sql.TypedColumn;
|
||||||
|
import org.apache.spark.util.LongAccumulator;
|
||||||
|
import org.elasticsearch.spark.rdd.api.java.JavaEsSpark;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.broker.model.Event;
|
||||||
|
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
|
||||||
|
import eu.dnetlib.dhp.broker.oa.util.EventGroup;
|
||||||
|
import eu.dnetlib.dhp.broker.oa.util.aggregators.subset.EventSubsetAggregator;
|
||||||
|
|
||||||
|
public class IndexEventSubsetJob {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(IndexEventSubsetJob.class);
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception {
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
IndexEventSubsetJob.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_event_subset.json")));
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
final String eventsPath = parser.get("workingPath") + "/events";
|
||||||
|
log.info("eventsPath: {}", eventsPath);
|
||||||
|
|
||||||
|
final String index = parser.get("index");
|
||||||
|
log.info("index: {}", index);
|
||||||
|
|
||||||
|
final String indexHost = parser.get("esHost");
|
||||||
|
log.info("indexHost: {}", indexHost);
|
||||||
|
|
||||||
|
final int maxEventsForTopic = NumberUtils.toInt(parser.get("maxEventsForTopic"));
|
||||||
|
log.info("maxEventsForTopic: {}", maxEventsForTopic);
|
||||||
|
|
||||||
|
final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
|
||||||
|
log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);
|
||||||
|
|
||||||
|
final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
|
||||||
|
|
||||||
|
final TypedColumn<Event, EventGroup> aggr = new EventSubsetAggregator(maxEventsForTopic).toColumn();
|
||||||
|
|
||||||
|
final LongAccumulator total = spark.sparkContext().longAccumulator("total_indexed");
|
||||||
|
|
||||||
|
final long now = new Date().getTime();
|
||||||
|
|
||||||
|
final Dataset<Event> subset = ClusterUtils
|
||||||
|
.readPath(spark, eventsPath, Event.class)
|
||||||
|
.groupByKey(e -> e.getTopic() + '@' + e.getMap().getTargetDatasourceId(), Encoders.STRING())
|
||||||
|
.agg(aggr)
|
||||||
|
.map(t -> t._2, Encoders.bean(EventGroup.class))
|
||||||
|
.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class));
|
||||||
|
|
||||||
|
final JavaRDD<String> inputRdd = subset
|
||||||
|
.map(e -> prepareEventForIndexing(e, now, total), Encoders.STRING())
|
||||||
|
.javaRDD();
|
||||||
|
|
||||||
|
final Map<String, String> esCfg = new HashMap<>();
|
||||||
|
// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
|
||||||
|
|
||||||
|
esCfg.put("es.index.auto.create", "false");
|
||||||
|
esCfg.put("es.nodes", indexHost);
|
||||||
|
esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
|
||||||
|
esCfg.put("es.batch.write.retry.count", "8");
|
||||||
|
esCfg.put("es.batch.write.retry.wait", "60s");
|
||||||
|
esCfg.put("es.batch.size.entries", "200");
|
||||||
|
esCfg.put("es.nodes.wan.only", "true");
|
||||||
|
|
||||||
|
log.info("*** Start indexing");
|
||||||
|
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
||||||
|
log.info("*** End indexing");
|
||||||
|
|
||||||
|
log.info("*** Deleting old events");
|
||||||
|
final String message = deleteOldEvents(brokerApiBaseUrl, now - 1000);
|
||||||
|
log.info("*** Deleted events: " + message);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws Exception {
|
||||||
|
final String url = brokerApiBaseUrl + "/api/events/byCreationDate/0/" + l;
|
||||||
|
final HttpDelete req = new HttpDelete(url);
|
||||||
|
|
||||||
|
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||||
|
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||||
|
return IOUtils.toString(response.getEntity().getContent());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String prepareEventForIndexing(final Event e, final long creationDate, final LongAccumulator acc)
|
||||||
|
throws JsonProcessingException {
|
||||||
|
acc.add(1);
|
||||||
|
|
||||||
|
e.setCreationDate(creationDate);
|
||||||
|
e.setExpiryDate(Long.MAX_VALUE);
|
||||||
|
|
||||||
|
return new ObjectMapper().writeValueAsString(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,238 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.oa;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.digest.DigestUtils;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
|
import org.apache.http.client.methods.HttpDelete;
|
||||||
|
import org.apache.http.client.methods.HttpGet;
|
||||||
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClients;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.apache.spark.util.LongAccumulator;
|
||||||
|
import org.elasticsearch.spark.rdd.api.java.JavaEsSpark;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.broker.model.ConditionParams;
|
||||||
|
import eu.dnetlib.dhp.broker.model.Event;
|
||||||
|
import eu.dnetlib.dhp.broker.model.MappedFields;
|
||||||
|
import eu.dnetlib.dhp.broker.model.Notification;
|
||||||
|
import eu.dnetlib.dhp.broker.model.Subscription;
|
||||||
|
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
|
||||||
|
import eu.dnetlib.dhp.broker.oa.util.NotificationGroup;
|
||||||
|
import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils;
|
||||||
|
|
||||||
|
public class IndexNotificationsJob {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(IndexNotificationsJob.class);
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception {
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
IndexNotificationsJob.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json")));
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
final String eventsPath = parser.get("workingPath") + "/events";
|
||||||
|
log.info("eventsPath: {}", eventsPath);
|
||||||
|
|
||||||
|
final String index = parser.get("index");
|
||||||
|
log.info("index: {}", index);
|
||||||
|
|
||||||
|
final String indexHost = parser.get("esHost");
|
||||||
|
log.info("indexHost: {}", indexHost);
|
||||||
|
|
||||||
|
final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
|
||||||
|
log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);
|
||||||
|
|
||||||
|
final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
|
||||||
|
|
||||||
|
final LongAccumulator total = spark.sparkContext().longAccumulator("total_indexed");
|
||||||
|
|
||||||
|
final long startTime = new Date().getTime();
|
||||||
|
|
||||||
|
final List<Subscription> subscriptions = listSubscriptions(brokerApiBaseUrl);
|
||||||
|
|
||||||
|
log.info("Number of subscriptions: " + subscriptions.size());
|
||||||
|
|
||||||
|
if (subscriptions.size() > 0) {
|
||||||
|
final Dataset<Notification> notifications = ClusterUtils
|
||||||
|
.readPath(spark, eventsPath, Event.class)
|
||||||
|
.map(e -> generateNotifications(e, subscriptions, startTime), Encoders.bean(NotificationGroup.class))
|
||||||
|
.flatMap(g -> g.getData().iterator(), Encoders.bean(Notification.class));
|
||||||
|
|
||||||
|
final JavaRDD<String> inputRdd = notifications
|
||||||
|
.map(n -> prepareForIndexing(n, total), Encoders.STRING())
|
||||||
|
.javaRDD();
|
||||||
|
|
||||||
|
final Map<String, String> esCfg = new HashMap<>();
|
||||||
|
// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
|
||||||
|
|
||||||
|
esCfg.put("es.index.auto.create", "false");
|
||||||
|
esCfg.put("es.nodes", indexHost);
|
||||||
|
esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY
|
||||||
|
esCfg.put("es.batch.write.retry.count", "8");
|
||||||
|
esCfg.put("es.batch.write.retry.wait", "60s");
|
||||||
|
esCfg.put("es.batch.size.entries", "200");
|
||||||
|
esCfg.put("es.nodes.wan.only", "true");
|
||||||
|
|
||||||
|
log.info("*** Start indexing");
|
||||||
|
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
||||||
|
log.info("*** End indexing");
|
||||||
|
|
||||||
|
log.info("*** Deleting old notifications");
|
||||||
|
final String message = deleteOldNotifications(brokerApiBaseUrl, startTime - 1000);
|
||||||
|
log.info("*** Deleted notifications: " + message);
|
||||||
|
|
||||||
|
log.info("*** sendNotifications (emails, ...)");
|
||||||
|
sendNotifications(brokerApiBaseUrl, startTime - 1000);
|
||||||
|
log.info("*** ALL done.");
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static NotificationGroup generateNotifications(final Event e,
|
||||||
|
final List<Subscription> subscriptions,
|
||||||
|
final long date) {
|
||||||
|
final List<Notification> list = subscriptions
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic()))
|
||||||
|
.filter(s -> verifyConditions(e.getMap(), s.conditionsAsMap()))
|
||||||
|
.map(s -> generateNotification(s, e, date))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
return new NotificationGroup(list);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Notification generateNotification(final Subscription s, final Event e, final long date) {
|
||||||
|
final Notification n = new Notification();
|
||||||
|
n.setNotificationId("ntf-" + DigestUtils.md5Hex(s.getSubscriptionId() + "@@@" + e.getEventId()));
|
||||||
|
n.setSubscriptionId(s.getSubscriptionId());
|
||||||
|
n.setEventId(e.getEventId());
|
||||||
|
n.setProducerId(e.getProducerId());
|
||||||
|
n.setTopic(e.getTopic());
|
||||||
|
n.setPayload(e.getPayload());
|
||||||
|
n.setMap(e.getMap());
|
||||||
|
n.setDate(date);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean verifyConditions(final MappedFields map,
|
||||||
|
final Map<String, List<ConditionParams>> conditions) {
|
||||||
|
if (conditions.containsKey("targetDatasourceName")
|
||||||
|
&& !SubscriptionUtils
|
||||||
|
.verifyExact(map.getTargetDatasourceName(), conditions.get("targetDatasourceName").get(0).getValue())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (conditions.containsKey("trust")
|
||||||
|
&& !SubscriptionUtils
|
||||||
|
.verifyFloatRange(
|
||||||
|
map.getTrust(), conditions.get("trust").get(0).getValue(),
|
||||||
|
conditions.get("trust").get(0).getOtherValue())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (conditions.containsKey("targetDateofacceptance") && !conditions
|
||||||
|
.get("targetDateofacceptance")
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
c -> SubscriptionUtils
|
||||||
|
.verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (conditions.containsKey("targetResultTitle")
|
||||||
|
&& !conditions
|
||||||
|
.get("targetResultTitle")
|
||||||
|
.stream()
|
||||||
|
.anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (conditions.containsKey("targetAuthors")
|
||||||
|
&& !conditions
|
||||||
|
.get("targetAuthors")
|
||||||
|
.stream()
|
||||||
|
.allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (conditions.containsKey("targetSubjects")
|
||||||
|
&& !conditions
|
||||||
|
.get("targetSubjects")
|
||||||
|
.stream()
|
||||||
|
.allMatch(c -> SubscriptionUtils.verifyListExact(map.getTargetSubjects(), c.getValue()))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Subscription> listSubscriptions(final String brokerApiBaseUrl) throws Exception {
|
||||||
|
final String url = brokerApiBaseUrl + "/api/subscriptions";
|
||||||
|
final HttpGet req = new HttpGet(url);
|
||||||
|
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||||
|
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||||
|
final String s = IOUtils.toString(response.getEntity().getContent());
|
||||||
|
return mapper
|
||||||
|
.readValue(s, mapper.getTypeFactory().constructCollectionType(List.class, Subscription.class));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws Exception {
|
||||||
|
final String url = brokerApiBaseUrl + "/api/notifications/byDate/0/" + l;
|
||||||
|
final HttpDelete req = new HttpDelete(url);
|
||||||
|
|
||||||
|
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||||
|
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||||
|
return IOUtils.toString(response.getEntity().getContent());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String sendNotifications(final String brokerApiBaseUrl, final long l) throws IOException {
|
||||||
|
final String url = brokerApiBaseUrl + "/api/openaireBroker/notifications/send/" + l;
|
||||||
|
final HttpGet req = new HttpGet(url);
|
||||||
|
|
||||||
|
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||||
|
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||||
|
return IOUtils.toString(response.getEntity().getContent());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String prepareForIndexing(final Notification n, final LongAccumulator acc)
|
||||||
|
throws JsonProcessingException {
|
||||||
|
acc.add(1);
|
||||||
|
return new ObjectMapper().writeValueAsString(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -20,6 +20,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.broker.model.Event;
|
import eu.dnetlib.dhp.broker.model.Event;
|
||||||
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
|
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
public class IndexOnESJob {
|
public class IndexOnESJob {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(IndexOnESJob.class);
|
private static final Logger log = LoggerFactory.getLogger(IndexOnESJob.class);
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.oa;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.broker.api.ShortEventMessage;
|
||||||
|
import eu.dnetlib.broker.objects.OaBrokerEventPayload;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.broker.model.Event;
|
||||||
|
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
public class PartitionEventsByDsIdJob {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(PartitionEventsByDsIdJob.class);
|
||||||
|
private static final String OPENDOAR_NSPREFIX = "opendoar____::";
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception {
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
PartitionEventsByDsIdJob.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
final String eventsPath = parser.get("workingPath") + "/events";
|
||||||
|
log.info("eventsPath: {}", eventsPath);
|
||||||
|
|
||||||
|
final String partitionPath = parser.get("workingPath") + "/eventsByOpendoarId";
|
||||||
|
log.info("partitionPath: {}", partitionPath);
|
||||||
|
|
||||||
|
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||||
|
|
||||||
|
ClusterUtils
|
||||||
|
.readPath(spark, eventsPath, Event.class)
|
||||||
|
.filter(e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId()))
|
||||||
|
.filter(e -> e.getMap().getTargetDatasourceId().contains(OPENDOAR_NSPREFIX))
|
||||||
|
.map(
|
||||||
|
e -> new Tuple2<>(
|
||||||
|
StringUtils.substringAfter(e.getMap().getTargetDatasourceId(), OPENDOAR_NSPREFIX),
|
||||||
|
messageFromNotification(e)),
|
||||||
|
Encoders.tuple(Encoders.STRING(), Encoders.bean(ShortEventMessage.class)))
|
||||||
|
.write()
|
||||||
|
.partitionBy("_1")
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.json(partitionPath);
|
||||||
|
|
||||||
|
});
|
||||||
|
renameSubDirs(partitionPath);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void renameSubDirs(final String path) throws IOException {
|
||||||
|
final String prefix = "_1=";
|
||||||
|
final FileSystem fs = FileSystem.get(new Configuration());
|
||||||
|
|
||||||
|
log.info("** Renaming subdirs of " + path);
|
||||||
|
for (final FileStatus fileStatus : fs.listStatus(new Path(path))) {
|
||||||
|
if (fileStatus.isDirectory()) {
|
||||||
|
final Path oldPath = fileStatus.getPath();
|
||||||
|
final String oldName = oldPath.getName();
|
||||||
|
if (oldName.startsWith(prefix)) {
|
||||||
|
final Path newPath = new Path(path + "/" + StringUtils.substringAfter(oldName, prefix));
|
||||||
|
log.info(" * " + oldPath.getName() + " -> " + newPath.getName());
|
||||||
|
fs.rename(oldPath, newPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ShortEventMessage messageFromNotification(final Event e) {
|
||||||
|
final Gson gson = new Gson();
|
||||||
|
|
||||||
|
final OaBrokerEventPayload payload = gson.fromJson(e.getPayload(), OaBrokerEventPayload.class);
|
||||||
|
|
||||||
|
final ShortEventMessage res = new ShortEventMessage();
|
||||||
|
|
||||||
|
res.setOriginalId(payload.getResult().getOriginalId());
|
||||||
|
res.setTitle(payload.getResult().getTitles().stream().filter(StringUtils::isNotBlank).findFirst().orElse(null));
|
||||||
|
res.setTopic(e.getTopic());
|
||||||
|
res.setTrust(payload.getTrust());
|
||||||
|
res.generateMessageFromObject(payload.getHighlight());
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.oa.util;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.broker.model.Notification;
|
||||||
|
|
||||||
|
public class NotificationGroup implements Serializable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private static final long serialVersionUID = 720996471281158977L;
|
||||||
|
|
||||||
|
private List<Notification> data = new ArrayList<>();
|
||||||
|
|
||||||
|
public NotificationGroup() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public NotificationGroup(final List<Notification> data) {
|
||||||
|
this.data = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Notification> getData() {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setData(final List<Notification> data) {
|
||||||
|
this.data = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public NotificationGroup addElement(final Notification elem) {
|
||||||
|
data.add(elem);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public NotificationGroup addGroup(final NotificationGroup group) {
|
||||||
|
data.addAll(group.getData());
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,49 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.oa.util;
|
||||||
|
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.lang3.math.NumberUtils;
|
||||||
|
import org.apache.commons.lang3.time.DateUtils;
|
||||||
|
|
||||||
|
public class SubscriptionUtils {
|
||||||
|
|
||||||
|
private static final long ONE_DAY = 86_400_000;
|
||||||
|
|
||||||
|
public static boolean verifyListSimilar(final List<String> list, final String value) {
|
||||||
|
return list.stream().anyMatch(s -> verifySimilar(s, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean verifyListExact(final List<String> list, final String value) {
|
||||||
|
return list.stream().anyMatch(s -> verifyExact(s, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean verifySimilar(final String s1, final String s2) {
|
||||||
|
for (final String part : s2.split("\\W+")) {
|
||||||
|
if (!StringUtils.containsIgnoreCase(s1, part)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean verifyFloatRange(final float trust, final String min, final String max) {
|
||||||
|
return trust >= NumberUtils.toFloat(min, 0) && trust <= NumberUtils.toFloat(max, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean verifyDateRange(final long date, final String min, final String max) {
|
||||||
|
try {
|
||||||
|
return date >= DateUtils.parseDate(min, "yyyy-MM-dd").getTime()
|
||||||
|
&& date < DateUtils.parseDate(max, "yyyy-MM-dd").getTime() + ONE_DAY;
|
||||||
|
} catch (final ParseException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean verifyExact(final String s1, final String s2) {
|
||||||
|
return StringUtils.equalsIgnoreCase(s1, s2);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -2,8 +2,6 @@
|
||||||
package eu.dnetlib.dhp.broker.oa.util.aggregators.stats;
|
package eu.dnetlib.dhp.broker.oa.util.aggregators.stats;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
public class DatasourceStats implements Serializable {
|
public class DatasourceStats implements Serializable {
|
||||||
|
|
||||||
|
@ -15,7 +13,8 @@ public class DatasourceStats implements Serializable {
|
||||||
private String id;
|
private String id;
|
||||||
private String name;
|
private String name;
|
||||||
private String type;
|
private String type;
|
||||||
private Map<String, Long> topics = new HashMap<>();
|
private String topic;
|
||||||
|
private long size = 0l;
|
||||||
|
|
||||||
public String getId() {
|
public String getId() {
|
||||||
return id;
|
return id;
|
||||||
|
@ -41,21 +40,24 @@ public class DatasourceStats implements Serializable {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Long> getTopics() {
|
public String getTopic() {
|
||||||
return topics;
|
return topic;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setTopics(final Map<String, Long> topics) {
|
public void setTopic(final String topic) {
|
||||||
this.topics = topics;
|
this.topic = topic;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void incrementTopic(final String topic, final long inc) {
|
public long getSize() {
|
||||||
if (topics.containsKey(topic)) {
|
return size;
|
||||||
topics.put(topic, topics.get(topic) + inc);
|
|
||||||
} else {
|
|
||||||
topics.put(topic, inc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setSize(final long size) {
|
||||||
|
this.size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrementSize(final long inc) {
|
||||||
|
this.size = this.size + inc;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,8 @@ public class StatsAggregator extends Aggregator<Event, DatasourceStats, Datasour
|
||||||
stats.setId(e.getMap().getTargetDatasourceId());
|
stats.setId(e.getMap().getTargetDatasourceId());
|
||||||
stats.setName(e.getMap().getTargetDatasourceName());
|
stats.setName(e.getMap().getTargetDatasourceName());
|
||||||
stats.setType(e.getMap().getTargetDatasourceType());
|
stats.setType(e.getMap().getTargetDatasourceType());
|
||||||
stats.incrementTopic(e.getTopic(), 1l);
|
stats.setTopic(e.getTopic());
|
||||||
|
stats.incrementSize(1l);
|
||||||
return stats;
|
return stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,8 +36,9 @@ public class StatsAggregator extends Aggregator<Event, DatasourceStats, Datasour
|
||||||
stats0.setId(stats1.getId());
|
stats0.setId(stats1.getId());
|
||||||
stats0.setName(stats1.getName());
|
stats0.setName(stats1.getName());
|
||||||
stats0.setType(stats1.getType());
|
stats0.setType(stats1.getType());
|
||||||
|
stats0.setTopic(stats1.getTopic());
|
||||||
}
|
}
|
||||||
stats1.getTopics().entrySet().forEach(e -> stats0.incrementTopic(e.getKey(), e.getValue()));
|
stats0.incrementSize(stats1.getSize());
|
||||||
return stats0;
|
return stats0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.oa.util.aggregators.subset;
|
||||||
|
|
||||||
|
import org.apache.spark.sql.Encoder;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.expressions.Aggregator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.broker.model.Event;
|
||||||
|
import eu.dnetlib.dhp.broker.oa.util.EventGroup;
|
||||||
|
|
||||||
|
public class EventSubsetAggregator extends Aggregator<Event, EventGroup, EventGroup> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private static final long serialVersionUID = -678071078823059805L;
|
||||||
|
|
||||||
|
private final int maxEventsForTopic;
|
||||||
|
|
||||||
|
public EventSubsetAggregator(final int maxEventsForTopic) {
|
||||||
|
this.maxEventsForTopic = maxEventsForTopic;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EventGroup zero() {
|
||||||
|
return new EventGroup();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EventGroup reduce(final EventGroup g, final Event e) {
|
||||||
|
if (g.getData().size() < maxEventsForTopic) {
|
||||||
|
g.getData().add(e);
|
||||||
|
}
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EventGroup merge(final EventGroup g0, final EventGroup g1) {
|
||||||
|
final int missing = maxEventsForTopic - g0.getData().size();
|
||||||
|
|
||||||
|
if (missing > 0) {
|
||||||
|
if (g1.getData().size() < missing) {
|
||||||
|
g0.getData().addAll(g1.getData());
|
||||||
|
} else {
|
||||||
|
g0.getData().addAll(g1.getData().subList(0, missing));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return g0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EventGroup finish(final EventGroup g) {
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Encoder<EventGroup> outputEncoder() {
|
||||||
|
return Encoders.bean(EventGroup.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Encoder<EventGroup> bufferEncoder() {
|
||||||
|
return Encoders.bean(EventGroup.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -25,13 +25,25 @@
|
||||||
<description>a black list (comma separeted, - for empty list) of datasource ids</description>
|
<description>a black list (comma separeted, - for empty list) of datasource ids</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>esIndexName</name>
|
<name>esEventIndexName</name>
|
||||||
<description>the elasticsearch index name</description>
|
<description>the elasticsearch index name for events</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esNotificationsIndexName</name>
|
||||||
|
<description>the elasticsearch index name for notifications</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>esIndexHost</name>
|
<name>esIndexHost</name>
|
||||||
<description>the elasticsearch host</description>
|
<description>the elasticsearch host</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>maxIndexedEventsForDsAndTopic</name>
|
||||||
|
<description>the max number of events for each couple (ds/topic)</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>brokerApiBaseUrl</name>
|
||||||
|
<description>the url of the broker service api</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>sparkDriverMemory</name>
|
||||||
<description>memory for driver process</description>
|
<description>memory for driver process</description>
|
||||||
|
@ -423,16 +435,16 @@
|
||||||
<arg>--datasourceTypeWhitelist</arg><arg>${datasourceTypeWhitelist}</arg>
|
<arg>--datasourceTypeWhitelist</arg><arg>${datasourceTypeWhitelist}</arg>
|
||||||
<arg>--datasourceIdBlacklist</arg><arg>${datasourceIdBlacklist}</arg>
|
<arg>--datasourceIdBlacklist</arg><arg>${datasourceIdBlacklist}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="index_es"/>
|
<ok to="index_event_subset"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="index_es">
|
<action name="index_event_subset">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>IndexOnESJob</name>
|
<name>IndexEventSubsetOnESJob</name>
|
||||||
<class>eu.dnetlib.dhp.broker.oa.IndexOnESJob</class>
|
<class>eu.dnetlib.dhp.broker.oa.IndexEventSubsetJob</class>
|
||||||
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
@ -445,8 +457,36 @@
|
||||||
--conf spark.sql.shuffle.partitions=3840
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||||
<arg>--index</arg><arg>${esIndexName}</arg>
|
<arg>--index</arg><arg>${esEventIndexName}</arg>
|
||||||
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
||||||
|
<arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
|
||||||
|
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="index_notifications"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="index_notifications">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>IndexNotificationsOnESJob</name>
|
||||||
|
<class>eu.dnetlib.dhp.broker.oa.IndexNotificationsJob</class>
|
||||||
|
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.dynamicAllocation.maxExecutors="8"
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||||
|
<arg>--index</arg><arg>${esNotificationsIndexName}</arg>
|
||||||
|
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
||||||
|
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="stats"/>
|
<ok to="stats"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "o",
|
||||||
|
"paramLongName": "workingPath",
|
||||||
|
"paramDescription": "the workinh path",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "idx",
|
||||||
|
"paramLongName": "index",
|
||||||
|
"paramDescription": "the ES index",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "es",
|
||||||
|
"paramLongName": "esHost",
|
||||||
|
"paramDescription": "the ES host",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "n",
|
||||||
|
"paramLongName": "maxEventsForTopic",
|
||||||
|
"paramDescription": "the max number of events for each couple (ds/topic)",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "broker",
|
||||||
|
"paramLongName": "brokerApiBaseUrl",
|
||||||
|
"paramDescription": "the url of the broker service api",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,26 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "o",
|
||||||
|
"paramLongName": "workingPath",
|
||||||
|
"paramDescription": "the workinh path",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "idx",
|
||||||
|
"paramLongName": "index",
|
||||||
|
"paramDescription": "the ES index",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "es",
|
||||||
|
"paramLongName": "esHost",
|
||||||
|
"paramDescription": "the ES host",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "broker",
|
||||||
|
"paramLongName": "brokerApiBaseUrl",
|
||||||
|
"paramDescription": "the url of the broker service api",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,18 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>spark2</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,137 @@
|
||||||
|
<workflow-app name="create broker events - partial" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>graphInputPath</name>
|
||||||
|
<description>the path where the graph is stored</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>workingPath</name>
|
||||||
|
<description>the path where the the generated data will be stored</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>datasourceIdWhitelist</name>
|
||||||
|
<value>-</value>
|
||||||
|
<description>a white list (comma separeted, - for empty list) of datasource ids</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>datasourceTypeWhitelist</name>
|
||||||
|
<value>-</value>
|
||||||
|
<description>a white list (comma separeted, - for empty list) of datasource types</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>datasourceIdBlacklist</name>
|
||||||
|
<value>-</value>
|
||||||
|
<description>a black list (comma separeted, - for empty list) of datasource ids</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esEventIndexName</name>
|
||||||
|
<description>the elasticsearch index name for events</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esNotificationsIndexName</name>
|
||||||
|
<description>the elasticsearch index name for notifications</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esIndexHost</name>
|
||||||
|
<description>the elasticsearch host</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>maxIndexedEventsForDsAndTopic</name>
|
||||||
|
<description>the max number of events for each couple (ds/topic)</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>brokerApiBaseUrl</name>
|
||||||
|
<description>the url of the broker service api</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozieActionShareLibForSpark2</name>
|
||||||
|
<description>oozie action sharelib for spark 2.*</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
<description>spark 2.* extra listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
<description>spark 2.* sql query execution listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<description>spark 2.* yarn history server address</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<description>spark 2.* event log dir location</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.queuename</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||||
|
<value>${oozieLauncherQueueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
|
<start to="index_notifications"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<action name="index_notifications">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>IndexNotificationsOnESJob</name>
|
||||||
|
<class>eu.dnetlib.dhp.broker.oa.IndexNotificationsJob</class>
|
||||||
|
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.dynamicAllocation.maxExecutors="8"
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||||
|
<arg>--index</arg><arg>${esNotificationsIndexName}</arg>
|
||||||
|
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
||||||
|
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
|
||||||
|
</workflow-app>
|
|
@ -8,6 +8,53 @@
|
||||||
<property>
|
<property>
|
||||||
<name>workingPath</name>
|
<name>workingPath</name>
|
||||||
<description>the path where the the generated data will be stored</description>
|
<description>the path where the the generated data will be stored</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>datasourceIdWhitelist</name>
|
||||||
|
<value>-</value>
|
||||||
|
<description>a white list (comma separeted, - for empty list) of datasource ids</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>datasourceTypeWhitelist</name>
|
||||||
|
<value>-</value>
|
||||||
|
<description>a white list (comma separeted, - for empty list) of datasource types</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>datasourceIdBlacklist</name>
|
||||||
|
<value>-</value>
|
||||||
|
<description>a black list (comma separeted, - for empty list) of datasource ids</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esEventIndexName</name>
|
||||||
|
<description>the elasticsearch index name for events</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esNotificationsIndexName</name>
|
||||||
|
<description>the elasticsearch index name for notifications</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esIndexHost</name>
|
||||||
|
<description>the elasticsearch host</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>maxIndexedEventsForDsAndTopic</name>
|
||||||
|
<description>the max number of events for each couple (ds/topic)</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>brokerApiBaseUrl</name>
|
||||||
|
<description>the url of the broker service api</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>brokerDbUrl</name>
|
||||||
|
<description>the url of the broker database</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>brokerDbUser</name>
|
||||||
|
<description>the user of the broker database</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>brokerDbPassword</name>
|
||||||
|
<description>the password of the broker database</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>sparkDriverMemory</name>
|
||||||
|
@ -64,23 +111,23 @@
|
||||||
</configuration>
|
</configuration>
|
||||||
</global>
|
</global>
|
||||||
|
|
||||||
<start to="index_es"/>
|
<start to="stats"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
<action name="index_es">
|
<action name="stats">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>IndexOnESJob</name>
|
<name>GenerateStatsJob</name>
|
||||||
<class>eu.dnetlib.dhp.broker.oa.IndexOnESJob</class>
|
<class>eu.dnetlib.dhp.broker.oa.GenerateStatsJob</class>
|
||||||
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.dynamicAllocation.maxExecutors="8"
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
@ -88,8 +135,10 @@
|
||||||
--conf spark.sql.shuffle.partitions=3840
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||||
<arg>--index</arg><arg>${esIndexName}</arg>
|
<arg>--dbUrl</arg><arg>${brokerDbUrl}</arg>
|
||||||
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
<arg>--dbUser</arg><arg>${brokerDbUser}</arg>
|
||||||
|
<arg>--dbPassword</arg><arg>${brokerDbPassword}</arg>
|
||||||
|
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "wp",
|
||||||
|
"paramLongName": "workingPath",
|
||||||
|
"paramDescription": "the working path",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "dburl",
|
||||||
|
"paramLongName": "dbUrl",
|
||||||
|
"paramDescription": "the broker database url",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "u",
|
||||||
|
"paramLongName": "dbUser",
|
||||||
|
"paramDescription": "the broker database user",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "p",
|
||||||
|
"paramLongName": "dbPassword",
|
||||||
|
"paramDescription": "the broker database password",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "broker",
|
||||||
|
"paramLongName": "brokerApiBaseUrl",
|
||||||
|
"paramDescription": "the url of the broker service api",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,52 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.oa.util;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class SubscriptionUtilsTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testVerifyListSimilar() {
|
||||||
|
assertTrue(SubscriptionUtils.verifyListSimilar(Arrays.asList("Michele Artini", "Claudio Atzori"), "artini"));
|
||||||
|
assertFalse(SubscriptionUtils.verifyListSimilar(Arrays.asList("Michele Artini", "Claudio Atzori"), "bardi"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testVerifyListExact() {
|
||||||
|
assertTrue(SubscriptionUtils.verifyListExact(Arrays.asList("Java", "Perl"), "perl"));
|
||||||
|
assertFalse(SubscriptionUtils.verifyListExact(Arrays.asList("Java", "Perl"), "C"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testVerifySimilar() {
|
||||||
|
assertTrue(SubscriptionUtils.verifySimilar("Java Programming", "java"));
|
||||||
|
assertFalse(SubscriptionUtils.verifySimilar("Java Programming", "soap"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testVerifyFloatRange() {
|
||||||
|
assertTrue(SubscriptionUtils.verifyFloatRange(0.5f, "0.4", "0.6"));
|
||||||
|
assertFalse(SubscriptionUtils.verifyFloatRange(0.8f, "0.4", "0.6"));
|
||||||
|
assertTrue(SubscriptionUtils.verifyFloatRange(0.5f, "", ""));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testVerifyDateRange() {
|
||||||
|
final long date = 1282738478000l; // 25 August 2010
|
||||||
|
|
||||||
|
assertTrue(SubscriptionUtils.verifyDateRange(date, "2010-01-01", "2011-01-01"));
|
||||||
|
assertFalse(SubscriptionUtils.verifyDateRange(date, "2020-01-01", "2021-01-01"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testVerifyExact() {
|
||||||
|
assertTrue(SubscriptionUtils.verifyExact("Java Programming", "java programming"));
|
||||||
|
assertFalse(SubscriptionUtils.verifyExact("Java Programming", "soap programming"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -373,6 +373,7 @@ public class ResultMapper implements Serializable {
|
||||||
private static Instance getGraphInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
|
private static Instance getGraphInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
|
||||||
Instance instance = new Instance();
|
Instance instance = new Instance();
|
||||||
|
|
||||||
|
|
||||||
setCommonValue(i, instance);
|
setCommonValue(i, instance);
|
||||||
|
|
||||||
return instance;
|
return instance;
|
||||||
|
|
|
@ -577,7 +577,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2].trim() : null;
|
final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2].trim() : null;
|
||||||
|
|
||||||
if (issn != null || eissn != null || lissn != null) {
|
if (issn != null || eissn != null || lissn != null) {
|
||||||
return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info);
|
return journal(name, issn, eissn, lissn, null, null, null, null, null, null, null, info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,7 +85,7 @@ SELECT
|
||||||
dc.officialname AS collectedfromname,
|
dc.officialname AS collectedfromname,
|
||||||
d.typology||'@@@dnet:datasource_typologies' AS datasourcetype,
|
d.typology||'@@@dnet:datasource_typologies' AS datasourcetype,
|
||||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
||||||
d.issn || ' @@@ ' || d.eissn || ' @@@ ' || d.lissn AS journal
|
concat_ws(' @@@ ', d.issn, d.eissn, d.lissn) AS journal
|
||||||
|
|
||||||
FROM dsm_datasources d
|
FROM dsm_datasources d
|
||||||
|
|
||||||
|
|
|
@ -73,12 +73,16 @@ public class MigrateDbEntitiesApplicationTest {
|
||||||
final Datasource ds = (Datasource) list.get(0);
|
final Datasource ds = (Datasource) list.get(0);
|
||||||
assertValidId(ds.getId());
|
assertValidId(ds.getId());
|
||||||
assertValidId(ds.getCollectedfrom().get(0).getKey());
|
assertValidId(ds.getCollectedfrom().get(0).getKey());
|
||||||
assertEquals(ds.getOfficialname().getValue(), getValueAsString("officialname", fields));
|
assertEquals(getValueAsString("officialname", fields), ds.getOfficialname().getValue());
|
||||||
assertEquals(ds.getEnglishname().getValue(), getValueAsString("englishname", fields));
|
assertEquals(getValueAsString("englishname", fields), ds.getEnglishname().getValue());
|
||||||
assertEquals(ds.getContactemail().getValue(), getValueAsString("contactemail", fields));
|
assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue());
|
||||||
assertEquals(ds.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields));
|
assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl().getValue());
|
||||||
assertEquals(ds.getNamespaceprefix().getValue(), getValueAsString("namespaceprefix", fields));
|
assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix().getValue());
|
||||||
assertEquals(ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
|
assertEquals(getValueAsString("collectedfromname", fields), ds.getCollectedfrom().get(0).getValue());
|
||||||
|
assertEquals(getValueAsString("officialname", fields), ds.getJournal().getName());
|
||||||
|
assertEquals("2579-5449", ds.getJournal().getIssnPrinted());
|
||||||
|
assertEquals("2597-6540", ds.getJournal().getIssnOnline());
|
||||||
|
assertEquals(null, ds.getJournal().getIssnLinking());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -92,9 +96,11 @@ public class MigrateDbEntitiesApplicationTest {
|
||||||
final Project p = (Project) list.get(0);
|
final Project p = (Project) list.get(0);
|
||||||
assertValidId(p.getId());
|
assertValidId(p.getId());
|
||||||
assertValidId(p.getCollectedfrom().get(0).getKey());
|
assertValidId(p.getCollectedfrom().get(0).getKey());
|
||||||
assertEquals(p.getAcronym().getValue(), getValueAsString("acronym", fields));
|
assertEquals(getValueAsString("acronym", fields), p.getAcronym().getValue());
|
||||||
assertEquals(p.getTitle().getValue(), getValueAsString("title", fields));
|
assertEquals(getValueAsString("title", fields), p.getTitle().getValue());
|
||||||
assertEquals(p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
|
assertEquals(getValueAsString("collectedfromname", fields), p.getCollectedfrom().get(0).getValue());
|
||||||
|
assertEquals(getValueAsFloat("fundedamount", fields), p.getFundedamount());
|
||||||
|
assertEquals(getValueAsFloat("totalcost", fields), p.getTotalcost());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -110,14 +116,14 @@ public class MigrateDbEntitiesApplicationTest {
|
||||||
final Organization o = (Organization) list.get(0);
|
final Organization o = (Organization) list.get(0);
|
||||||
assertValidId(o.getId());
|
assertValidId(o.getId());
|
||||||
assertValidId(o.getCollectedfrom().get(0).getKey());
|
assertValidId(o.getCollectedfrom().get(0).getKey());
|
||||||
assertEquals(o.getLegalshortname().getValue(), getValueAsString("legalshortname", fields));
|
assertEquals(getValueAsString("legalshortname", fields), o.getLegalshortname().getValue());
|
||||||
assertEquals(o.getLegalname().getValue(), getValueAsString("legalname", fields));
|
assertEquals(getValueAsString("legalname", fields), o.getLegalname().getValue());
|
||||||
assertEquals(o.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields));
|
assertEquals(getValueAsString("websiteurl", fields), o.getWebsiteurl().getValue());
|
||||||
assertEquals(o.getCountry().getClassid(), getValueAsString("country", fields).split("@@@")[0]);
|
assertEquals(getValueAsString("country", fields).split("@@@")[0], o.getCountry().getClassid());
|
||||||
assertEquals(o.getCountry().getClassname(), getValueAsString("country", fields).split("@@@")[0]);
|
assertEquals(getValueAsString("country", fields).split("@@@")[0], o.getCountry().getClassname());
|
||||||
assertEquals(o.getCountry().getSchemeid(), getValueAsString("country", fields).split("@@@")[1]);
|
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid());
|
||||||
assertEquals(o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[1]);
|
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemename());
|
||||||
assertEquals(o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
|
assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -322,12 +328,20 @@ public class MigrateDbEntitiesApplicationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getValueAsString(final String name, final List<TypedField> fields) {
|
private String getValueAsString(final String name, final List<TypedField> fields) {
|
||||||
|
return getValueAs(name, fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Float getValueAsFloat(final String name, final List<TypedField> fields) {
|
||||||
|
return new Float(getValueAs(name, fields).toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> T getValueAs(final String name, final List<TypedField> fields) {
|
||||||
return fields
|
return fields
|
||||||
.stream()
|
.stream()
|
||||||
.filter(f -> f.getField().equals(name))
|
.filter(f -> f.getField().equals(name))
|
||||||
.map(TypedField::getValue)
|
.map(TypedField::getValue)
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.map(o -> o.toString())
|
.map(o -> (T) o)
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.get();
|
.get();
|
||||||
}
|
}
|
||||||
|
|
|
@ -142,12 +142,12 @@
|
||||||
{
|
{
|
||||||
"field": "totalcost",
|
"field": "totalcost",
|
||||||
"type": "double",
|
"type": "double",
|
||||||
"value": null
|
"value": 157846
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "fundedamount",
|
"field": "fundedamount",
|
||||||
"type": "double",
|
"type": "double",
|
||||||
"value": null
|
"value": 157846
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "collectedfromid",
|
"field": "collectedfromid",
|
||||||
|
|
2
pom.xml
2
pom.xml
|
@ -663,7 +663,7 @@
|
||||||
<mockito-core.version>3.3.3</mockito-core.version>
|
<mockito-core.version>3.3.3</mockito-core.version>
|
||||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||||
<vtd.version>[2.12,3.0)</vtd.version>
|
<vtd.version>[2.12,3.0)</vtd.version>
|
||||||
<dnet.openaire.broker.common>3.1.0</dnet.openaire.broker.common>
|
<dnet.openaire.broker.common>3.1.1</dnet.openaire.broker.common>
|
||||||
<solr.version>7.5.0</solr.version>
|
<solr.version>7.5.0</solr.version>
|
||||||
<okhttp.version>4.7.2</okhttp.version>
|
<okhttp.version>4.7.2</okhttp.version>
|
||||||
<common.compress.version>1.1</common.compress.version>
|
<common.compress.version>1.1</common.compress.version>
|
||||||
|
|
Loading…
Reference in New Issue