workingDir and outputDir

This commit is contained in:
Michele Artini 2020-12-10 14:47:51 +01:00
parent 2e7df07328
commit 933b4c1ada
7 changed files with 45 additions and 28 deletions

View File

@ -30,8 +30,9 @@ public class CheckDuplictedIdsJob {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString(CheckDuplictedIdsJob.class .toString(
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/check_duplicates.json"))); CheckDuplictedIdsJob.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/check_duplicates.json")));
parser.parseArgument(args); parser.parseArgument(args);
final SparkConf conf = new SparkConf(); final SparkConf conf = new SparkConf();
@ -59,7 +60,8 @@ public class CheckDuplictedIdsJob {
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(countPath);; .json(countPath);
;
} }

View File

@ -33,8 +33,9 @@ public class GenerateEventsJob {
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString(GenerateEventsJob.class .toString(
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/generate_events.json"))); GenerateEventsJob.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/generate_events.json")));
parser.parseArgument(args); parser.parseArgument(args);
final Boolean isSparkSessionManaged = Optional final Boolean isSparkSessionManaged = Optional
@ -72,8 +73,10 @@ public class GenerateEventsJob {
.readPath(spark, workingDir + "/duplicates", ResultGroup.class); .readPath(spark, workingDir + "/duplicates", ResultGroup.class);
final Dataset<Event> dataset = groups final Dataset<Event> dataset = groups
.map(g -> EventFinder .map(
.generateEvents(g, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist, accumulators), Encoders g -> EventFinder
.generateEvents(g, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist, accumulators),
Encoders
.bean(EventGroup.class)) .bean(EventGroup.class))
.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class)); .flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class));

View File

@ -33,8 +33,9 @@ public class GenerateStatsJob {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString(GenerateStatsJob.class .toString(
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/stats_params.json"))); GenerateStatsJob.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/stats_params.json")));
parser.parseArgument(args); parser.parseArgument(args);
final Boolean isSparkSessionManaged = Optional final Boolean isSparkSessionManaged = Optional

View File

@ -39,8 +39,9 @@ public class IndexEventSubsetJob {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString(IndexEventSubsetJob.class .toString(
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_event_subset.json"))); IndexEventSubsetJob.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_event_subset.json")));
parser.parseArgument(args); parser.parseArgument(args);
final SparkConf conf = new SparkConf(); final SparkConf conf = new SparkConf();

View File

@ -47,8 +47,9 @@ public class IndexNotificationsJob {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString(IndexNotificationsJob.class .toString(
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json"))); IndexNotificationsJob.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json")));
parser.parseArgument(args); parser.parseArgument(args);
final SparkConf conf = new SparkConf(); final SparkConf conf = new SparkConf();
@ -116,7 +117,8 @@ public class IndexNotificationsJob {
final long date) { final long date) {
final List<Notification> list = subscriptions final List<Notification> list = subscriptions
.stream() .stream()
.filter(s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) .filter(
s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic()))
.filter(s -> verifyConditions(e.getMap(), s.conditionsAsMap())) .filter(s -> verifyConditions(e.getMap(), s.conditionsAsMap()))
.map(s -> generateNotification(s, e, date)) .map(s -> generateNotification(s, e, date))
.collect(Collectors.toList()); .collect(Collectors.toList());
@ -147,15 +149,18 @@ public class IndexNotificationsJob {
if (conditions.containsKey("trust") if (conditions.containsKey("trust")
&& !SubscriptionUtils && !SubscriptionUtils
.verifyFloatRange(map.getTrust(), conditions.get("trust").get(0).getValue(), conditions.get("trust").get(0).getOtherValue())) { .verifyFloatRange(
map.getTrust(), conditions.get("trust").get(0).getValue(),
conditions.get("trust").get(0).getOtherValue())) {
return false; return false;
} }
if (conditions.containsKey("targetDateofacceptance") && !conditions if (conditions.containsKey("targetDateofacceptance") && !conditions
.get("targetDateofacceptance") .get("targetDateofacceptance")
.stream() .stream()
.anyMatch(c -> SubscriptionUtils .anyMatch(
.verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { c -> SubscriptionUtils
.verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) {
return false; return false;
} }

View File

@ -29,8 +29,9 @@ public class IndexOnESJob {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString(IndexOnESJob.class .toString(
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_es.json"))); IndexOnESJob.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_es.json")));
parser.parseArgument(args); parser.parseArgument(args);
final SparkConf conf = new SparkConf(); final SparkConf conf = new SparkConf();

View File

@ -42,8 +42,9 @@ public class PartitionEventsByDsIdJob {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString(PartitionEventsByDsIdJob.class .toString(
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/od_partitions_params.json"))); PartitionEventsByDsIdJob.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/od_partitions_params.json")));
parser.parseArgument(args); parser.parseArgument(args);
final Boolean isSparkSessionManaged = Optional final Boolean isSparkSessionManaged = Optional
@ -66,12 +67,13 @@ public class PartitionEventsByDsIdJob {
final Set<String> validOpendoarIds = new HashSet<>(); final Set<String> validOpendoarIds = new HashSet<>();
if (!opendoarIds.trim().equals("-")) { if (!opendoarIds.trim().equals("-")) {
validOpendoarIds validOpendoarIds
.addAll(Arrays .addAll(
.stream(opendoarIds.split(",")) Arrays
.map(String::trim) .stream(opendoarIds.split(","))
.filter(StringUtils::isNotBlank) .map(String::trim)
.map(s -> OPENDOAR_NSPREFIX + DigestUtils.md5Hex(s)) .filter(StringUtils::isNotBlank)
.collect(Collectors.toSet())); .map(s -> OPENDOAR_NSPREFIX + DigestUtils.md5Hex(s))
.collect(Collectors.toSet()));
} }
log.info("validOpendoarIds: {}", validOpendoarIds); log.info("validOpendoarIds: {}", validOpendoarIds);
@ -82,7 +84,9 @@ public class PartitionEventsByDsIdJob {
.filter((FilterFunction<Event>) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId())) .filter((FilterFunction<Event>) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId()))
.filter((FilterFunction<Event>) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX)) .filter((FilterFunction<Event>) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX))
.filter((FilterFunction<Event>) e -> validOpendoarIds.contains(e.getMap().getTargetDatasourceId())) .filter((FilterFunction<Event>) e -> validOpendoarIds.contains(e.getMap().getTargetDatasourceId()))
.map((MapFunction<Event, ShortEventMessageWithGroupId>) e -> messageFromNotification(e), Encoders.bean(ShortEventMessageWithGroupId.class)) .map(
(MapFunction<Event, ShortEventMessageWithGroupId>) e -> messageFromNotification(e),
Encoders.bean(ShortEventMessageWithGroupId.class))
.coalesce(1) .coalesce(1)
.write() .write()
.partitionBy("group") .partitionBy("group")