forked from D-Net/dnet-hadoop
workingDir and outputDir
This commit is contained in:
parent
10f3f7eca7
commit
467aa77279
|
@ -30,8 +30,9 @@ public class CheckDuplictedIdsJob {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(CheckDuplictedIdsJob.class
|
.toString(
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/check_duplicates.json")));
|
CheckDuplictedIdsJob.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/check_duplicates.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
final SparkConf conf = new SparkConf();
|
final SparkConf conf = new SparkConf();
|
||||||
|
@ -59,7 +60,8 @@ public class CheckDuplictedIdsJob {
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(countPath);;
|
.json(countPath);
|
||||||
|
;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,8 +33,9 @@ public class GenerateEventsJob {
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(GenerateEventsJob.class
|
.toString(
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/generate_events.json")));
|
GenerateEventsJob.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/generate_events.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
final Boolean isSparkSessionManaged = Optional
|
final Boolean isSparkSessionManaged = Optional
|
||||||
|
@ -72,8 +73,10 @@ public class GenerateEventsJob {
|
||||||
.readPath(spark, workingDir + "/duplicates", ResultGroup.class);
|
.readPath(spark, workingDir + "/duplicates", ResultGroup.class);
|
||||||
|
|
||||||
final Dataset<Event> dataset = groups
|
final Dataset<Event> dataset = groups
|
||||||
.map(g -> EventFinder
|
.map(
|
||||||
.generateEvents(g, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist, accumulators), Encoders
|
g -> EventFinder
|
||||||
|
.generateEvents(g, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist, accumulators),
|
||||||
|
Encoders
|
||||||
.bean(EventGroup.class))
|
.bean(EventGroup.class))
|
||||||
.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class));
|
.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class));
|
||||||
|
|
||||||
|
|
|
@ -33,8 +33,9 @@ public class GenerateStatsJob {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(GenerateStatsJob.class
|
.toString(
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/stats_params.json")));
|
GenerateStatsJob.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/stats_params.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
final Boolean isSparkSessionManaged = Optional
|
final Boolean isSparkSessionManaged = Optional
|
||||||
|
|
|
@ -39,8 +39,9 @@ public class IndexEventSubsetJob {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(IndexEventSubsetJob.class
|
.toString(
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_event_subset.json")));
|
IndexEventSubsetJob.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_event_subset.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
final SparkConf conf = new SparkConf();
|
final SparkConf conf = new SparkConf();
|
||||||
|
|
|
@ -47,8 +47,9 @@ public class IndexNotificationsJob {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(IndexNotificationsJob.class
|
.toString(
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json")));
|
IndexNotificationsJob.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
final SparkConf conf = new SparkConf();
|
final SparkConf conf = new SparkConf();
|
||||||
|
@ -116,7 +117,8 @@ public class IndexNotificationsJob {
|
||||||
final long date) {
|
final long date) {
|
||||||
final List<Notification> list = subscriptions
|
final List<Notification> list = subscriptions
|
||||||
.stream()
|
.stream()
|
||||||
.filter(s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic()))
|
.filter(
|
||||||
|
s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic()))
|
||||||
.filter(s -> verifyConditions(e.getMap(), s.conditionsAsMap()))
|
.filter(s -> verifyConditions(e.getMap(), s.conditionsAsMap()))
|
||||||
.map(s -> generateNotification(s, e, date))
|
.map(s -> generateNotification(s, e, date))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
@ -147,15 +149,18 @@ public class IndexNotificationsJob {
|
||||||
|
|
||||||
if (conditions.containsKey("trust")
|
if (conditions.containsKey("trust")
|
||||||
&& !SubscriptionUtils
|
&& !SubscriptionUtils
|
||||||
.verifyFloatRange(map.getTrust(), conditions.get("trust").get(0).getValue(), conditions.get("trust").get(0).getOtherValue())) {
|
.verifyFloatRange(
|
||||||
|
map.getTrust(), conditions.get("trust").get(0).getValue(),
|
||||||
|
conditions.get("trust").get(0).getOtherValue())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (conditions.containsKey("targetDateofacceptance") && !conditions
|
if (conditions.containsKey("targetDateofacceptance") && !conditions
|
||||||
.get("targetDateofacceptance")
|
.get("targetDateofacceptance")
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(c -> SubscriptionUtils
|
.anyMatch(
|
||||||
.verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) {
|
c -> SubscriptionUtils
|
||||||
|
.verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,8 +29,9 @@ public class IndexOnESJob {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(IndexOnESJob.class
|
.toString(
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_es.json")));
|
IndexOnESJob.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_es.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
final SparkConf conf = new SparkConf();
|
final SparkConf conf = new SparkConf();
|
||||||
|
|
|
@ -42,8 +42,9 @@ public class PartitionEventsByDsIdJob {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(PartitionEventsByDsIdJob.class
|
.toString(
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/od_partitions_params.json")));
|
PartitionEventsByDsIdJob.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/od_partitions_params.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
final Boolean isSparkSessionManaged = Optional
|
final Boolean isSparkSessionManaged = Optional
|
||||||
|
@ -66,12 +67,13 @@ public class PartitionEventsByDsIdJob {
|
||||||
final Set<String> validOpendoarIds = new HashSet<>();
|
final Set<String> validOpendoarIds = new HashSet<>();
|
||||||
if (!opendoarIds.trim().equals("-")) {
|
if (!opendoarIds.trim().equals("-")) {
|
||||||
validOpendoarIds
|
validOpendoarIds
|
||||||
.addAll(Arrays
|
.addAll(
|
||||||
.stream(opendoarIds.split(","))
|
Arrays
|
||||||
.map(String::trim)
|
.stream(opendoarIds.split(","))
|
||||||
.filter(StringUtils::isNotBlank)
|
.map(String::trim)
|
||||||
.map(s -> OPENDOAR_NSPREFIX + DigestUtils.md5Hex(s))
|
.filter(StringUtils::isNotBlank)
|
||||||
.collect(Collectors.toSet()));
|
.map(s -> OPENDOAR_NSPREFIX + DigestUtils.md5Hex(s))
|
||||||
|
.collect(Collectors.toSet()));
|
||||||
}
|
}
|
||||||
log.info("validOpendoarIds: {}", validOpendoarIds);
|
log.info("validOpendoarIds: {}", validOpendoarIds);
|
||||||
|
|
||||||
|
@ -82,7 +84,9 @@ public class PartitionEventsByDsIdJob {
|
||||||
.filter((FilterFunction<Event>) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId()))
|
.filter((FilterFunction<Event>) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId()))
|
||||||
.filter((FilterFunction<Event>) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX))
|
.filter((FilterFunction<Event>) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX))
|
||||||
.filter((FilterFunction<Event>) e -> validOpendoarIds.contains(e.getMap().getTargetDatasourceId()))
|
.filter((FilterFunction<Event>) e -> validOpendoarIds.contains(e.getMap().getTargetDatasourceId()))
|
||||||
.map((MapFunction<Event, ShortEventMessageWithGroupId>) e -> messageFromNotification(e), Encoders.bean(ShortEventMessageWithGroupId.class))
|
.map(
|
||||||
|
(MapFunction<Event, ShortEventMessageWithGroupId>) e -> messageFromNotification(e),
|
||||||
|
Encoders.bean(ShortEventMessageWithGroupId.class))
|
||||||
.coalesce(1)
|
.coalesce(1)
|
||||||
.write()
|
.write()
|
||||||
.partitionBy("group")
|
.partitionBy("group")
|
||||||
|
|
Loading…
Reference in New Issue