From e34e7d672892513b929aff891f020f923393644d Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 8 Jun 2020 08:32:22 +0200 Subject: [PATCH] merge DOIBoost --- .../dhp/broker/model/EventFactory.java | 17 +++++-- .../broker/oa/GenerateEventsApplication.java | 45 +++++++++--------- .../relatedProjects/EnrichMissingProject.java | 3 +- .../relatedProjects/EnrichMoreProject.java | 6 ++- .../EnrichMissingSoftware.java | 3 +- .../relatedSoftware/EnrichMoreSoftware.java | 6 ++- .../simple/EnrichMissingOpenAccess.java | 4 +- .../dhp/broker/oa/util/BrokerConstants.java | 4 +- .../dhp/broker/oa/util/ConversionUtils.java | 47 +++++++++++++++---- .../dhp/broker/oa/util/UpdateInfo.java | 16 ++++++- 10 files changed, 102 insertions(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java index fcecc8369..df33fae0d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java @@ -36,8 +36,9 @@ public class EventFactory { final Map map = createMapFromResult(updateInfo); - final String eventId = - calculateEventId(updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId().get(0), updateInfo.getHighlightValueAsString()); + final String eventId = calculateEventId( + updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId().get(0), + updateInfo.getHighlightValueAsString()); res.setEventId(eventId); res.setProducerId(PRODUCER_ID); @@ -80,13 +81,17 @@ public class EventFactory { final List subjects = target.getSubject(); if (subjects.size() > 0) { map - .put("target_publication_subject_list", subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList())); + .put( + "target_publication_subject_list", + subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList())); } final List authors = target.getAuthor(); if (authors.size() > 0) { map - .put("target_publication_author_list", authors.stream().map(Author::getFullname).collect(Collectors.toList())); + .put( + "target_publication_author_list", + authors.stream().map(Author::getFullname).collect(Collectors.toList())); } // PROVENANCE INFO @@ -113,7 +118,9 @@ public class EventFactory { } private static long parseDateTolong(final String date) { - if (StringUtils.isBlank(date)) { return -1; } + if (StringUtils.isBlank(date)) { + return -1; + } try { return DateUtils.parseDate(date, DATE_PATTERNS).getTime(); } catch (final ParseException e) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index fede6f8bf..a38e760c5 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -87,32 +87,25 @@ public class GenerateEventsApplication { private static final UpdateMatcher>, ?> enrichMoreSoftware = new EnrichMoreSoftware(); private static final UpdateMatcher>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = - new EnrichMissingPublicationIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy(); private static final UpdateMatcher>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = - new EnrichMissingPublicationIsSupplementedTo(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = - new EnrichMissingPublicationIsSupplementedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy(); - private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = - new EnrichMissingDatasetIsRelatedTo(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = - new EnrichMissingDatasetIsReferencedBy(); - private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = - new EnrichMissingDatasetReferences(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = - new EnrichMissingDatasetIsSupplementedTo(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = - new EnrichMissingDatasetIsSupplementedBy(); + private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + .toString( + GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -156,8 +149,9 @@ public class GenerateEventsApplication { final String graphPath, final Class resultClazz) { - final Dataset results = readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) - .filter(r -> r.getDataInfo().getDeletedbyinference()); + final Dataset results = readPath( + spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) + .filter(r -> r.getDataInfo().getDeletedbyinference()); final Dataset rels = readPath(spark, graphPath + "/relation", Relation.class) .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); @@ -196,15 +190,18 @@ public class GenerateEventsApplication { return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); } - private static JavaRDD generateRelationEvents(final SparkSession spark, + private static JavaRDD generateRelationEvents( + final SparkSession spark, final String graphPath, final Class sourceClass, final Class targetClass) { - final Dataset sources = readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass) - .filter(r -> r.getDataInfo().getDeletedbyinference()); + final Dataset sources = readPath( + spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass) + .filter(r -> r.getDataInfo().getDeletedbyinference()); - final Dataset targets = readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), targetClass); + final Dataset targets = readPath( + spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), targetClass); final Dataset mergedRels = readPath(spark, graphPath + "/relation", Relation.class) .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java index 3ce29f864..ed5e71d52 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java @@ -28,7 +28,8 @@ public class EnrichMissingProject if (source.getRight().isEmpty()) { return Arrays.asList(); } else { - return target.getRight() + return target + .getRight() .stream() .map(ConversionUtils::oafProjectToBrokerProject) .map(p -> generateUpdateInfo(p, source, target)) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java index 70ab5f71c..753166a82 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java @@ -24,12 +24,14 @@ public class EnrichMoreProject extends UpdateMatcher> protected List> findUpdates(final Pair> source, final Pair> target) { - final Set existingProjects = source.getRight() + final Set existingProjects = source + .getRight() .stream() .map(Project::getId) .collect(Collectors.toSet()); - return target.getRight() + return target + .getRight() .stream() .filter(p -> !existingProjects.contains(p.getId())) .map(ConversionUtils::oafProjectToBrokerProject) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java index d0d0f4083..fd9091dc1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java @@ -29,7 +29,8 @@ public class EnrichMissingSoftware if (source.getRight().isEmpty()) { return Arrays.asList(); } else { - return target.getRight() + return target + .getRight() .stream() .map(ConversionUtils::oafSoftwareToBrokerSoftware) .map(p -> generateUpdateInfo(p, source, target)) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java index 455f08468..cb649dfff 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java @@ -26,12 +26,14 @@ public class EnrichMoreSoftware final Pair> source, final Pair> target) { - final Set existingSoftwares = source.getRight() + final Set existingSoftwares = source + .getRight() .stream() .map(Software::getId) .collect(Collectors.toSet()); - return target.getRight() + return target + .getRight() .stream() .filter(p -> !existingSoftwares.contains(p.getId())) .map(ConversionUtils::oafSoftwareToBrokerSoftware) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java index 524846d06..30638cefb 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java @@ -28,7 +28,9 @@ public class EnrichMissingOpenAccess extends UpdateMatcher { .filter(right -> right.equals(BrokerConstants.OPEN_ACCESS)) .count(); - if (count > 0) { return Arrays.asList(); } + if (count > 0) { + return Arrays.asList(); + } return source .getInstance() diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index 06a46b7da..f4da59518 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -15,7 +15,7 @@ public class BrokerConstants { public static final String OPEN_ACCESS = "OPEN"; public static final String IS_MERGED_IN_CLASS = "isMergedIn"; - public static final List> RESULT_CLASSES = - Arrays.asList(Publication.class, Dataset.class, Software.class, OtherResearchProduct.class); + public static final List> RESULT_CLASSES = Arrays + .asList(Publication.class, Dataset.class, Software.class, OtherResearchProduct.class); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java index d18c46e56..2b39115b1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -56,7 +56,13 @@ public class ConversionUtils { .setOriginalId(d.getOriginalId().get(0)) .setTitles(structPropList(d.getTitle())) .setPids(d.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList())) - .setInstances(d.getInstance().stream().map(ConversionUtils::oafInstanceToBrokerInstances).flatMap(List::stream).collect(Collectors.toList())) + .setInstances( + d + .getInstance() + .stream() + .map(ConversionUtils::oafInstanceToBrokerInstances) + .flatMap(List::stream) + .collect(Collectors.toList())) .setCollectedFrom(d.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList())) : null; } @@ -74,11 +80,23 @@ public class ConversionUtils { .setPublisher(fieldValue(result.getPublisher())) .setEmbargoenddate(fieldValue(result.getEmbargoenddate())) .setContributor(fieldList(result.getContributor())) - .setJournal(result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null) + .setJournal( + result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null) .setCollectedFrom(result.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList())) .setPids(result.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList())) - .setInstances(result.getInstance().stream().map(ConversionUtils::oafInstanceToBrokerInstances).flatMap(List::stream).collect(Collectors.toList())) - .setExternalReferences(result.getExternalReference().stream().map(ConversionUtils::oafExtRefToBrokerExtRef).collect(Collectors.toList())) + .setInstances( + result + .getInstance() + .stream() + .map(ConversionUtils::oafInstanceToBrokerInstances) + .flatMap(List::stream) + .collect(Collectors.toList())) + .setExternalReferences( + result + .getExternalReference() + .stream() + .map(ConversionUtils::oafExtRefToBrokerExtRef) + .collect(Collectors.toList())) : null; } @@ -100,7 +118,9 @@ public class ConversionUtils { } public static final eu.dnetlib.broker.objects.Project oafProjectToBrokerProject(final Project p) { - if (p == null) { return null; } + if (p == null) { + return null; + } final eu.dnetlib.broker.objects.Project res = new eu.dnetlib.broker.objects.Project() .setTitle(fieldValue(p.getTitle())) @@ -136,20 +156,29 @@ public class ConversionUtils { } private static String fieldValue(final List> fl) { - return fl != null ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null) : null; + return fl != null ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null) + : null; } private static String structPropValue(final List props) { - return props != null ? props.stream().map(StructuredProperty::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null) : null; + return props != null + ? props.stream().map(StructuredProperty::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null) + : null; } private static List fieldList(final List> fl) { - return fl != null ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).collect(Collectors.toList()) + return fl != null + ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).collect(Collectors.toList()) : new ArrayList<>(); } private static List structPropList(final List props) { - return props != null ? props.stream().map(StructuredProperty::getValue).filter(StringUtils::isNotBlank).collect(Collectors.toList()) + return props != null + ? props + .stream() + .map(StructuredProperty::getValue) + .filter(StringUtils::isNotBlank) + .collect(Collectors.toList()) : new ArrayList<>(); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index 07b171080..04b426a1c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -83,8 +83,20 @@ public final class UpdateInfo { compileHighlight.accept(hl, getHighlightValue()); final String provId = getSource().getOriginalId().stream().findFirst().orElse(null); - final String provRepo = getSource().getCollectedfrom().stream().map(KeyValue::getValue).findFirst().orElse(null); - final String provUrl = getSource().getInstance().stream().map(Instance::getUrl).flatMap(List::stream).findFirst().orElse(null);; + final String provRepo = getSource() + .getCollectedfrom() + .stream() + .map(KeyValue::getValue) + .findFirst() + .orElse(null); + final String provUrl = getSource() + .getInstance() + .stream() + .map(Instance::getUrl) + .flatMap(List::stream) + .findFirst() + .orElse(null); + ; final Provenance provenance = new Provenance().setId(provId).setRepositoryName(provRepo).setUrl(provUrl);