From d1cadc77c90bd3e6eca9b351b9dc4620cb915c2c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 24 Sep 2024 10:57:20 +0200 Subject: [PATCH] [graph provision] person serialisation, limit the number of authorships and coauthorships before expanding the payloads --- .../dhp/schema/oaf/utils/ModelHardLimits.java | 6 +++--- .../dhp/oa/provision/PayloadConverterJob.java | 12 +++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java index e4b184fa1..68f60d4d9 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java @@ -25,11 +25,11 @@ public class ModelHardLimits { public static final int MAX_ABSTRACT_LENGTH = 150000; public static final int MAX_RELATED_ABSTRACT_LENGTH = 500; public static final int MAX_INSTANCES = 10; - public static final Map MAX_RELATIONS_BY_RELCLASS = Maps.newHashMap(); + public static final Map MAX_RELATIONS_BY_RELCLASS = Maps.newHashMap(); static { - MAX_RELATIONS_BY_RELCLASS.put(ModelConstants.PERSON_PERSON_HASCOAUTHORED, 500); - MAX_RELATIONS_BY_RELCLASS.put(ModelConstants.RESULT_PERSON_HASAUTHORED, 500); + MAX_RELATIONS_BY_RELCLASS.put(ModelConstants.PERSON_PERSON_HASCOAUTHORED, 500L); + MAX_RELATIONS_BY_RELCLASS.put(ModelConstants.RESULT_PERSON_HASAUTHORED, 500L); } public static String getCollectionName(String format) { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java index cb2d2e799..58838d047 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java @@ -149,8 +149,8 @@ public class PayloadConverterJob { } /** - This function iterates through the RelatedEntityWrapper(s) associated to the JoinedEntity and rules out - those exceeding the maximum allowed frequency defined in eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits#MAX_RELATIONS_BY_RELCLASS + * This function iterates through the RelatedEntityWrapper(s) associated to the JoinedEntity and rules out + * those exceeding the maximum allowed frequency defined in eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits#MAX_RELATIONS_BY_RELCLASS */ private static JoinedEntity pruneRelatedEntities(JoinedEntity je) { Map freqs = Maps.newHashMap(); @@ -159,9 +159,11 @@ public class PayloadConverterJob { if (je.getLinks() != null) { je.getLinks().forEach(link -> { final String relClass = link.getRelation().getRelClass(); - Long count = freqs.putIfAbsent(relClass, 0L); - if (Objects.isNull(count) || (MAX_RELATIONS_BY_RELCLASS.containsKey(relClass) - && count <= MAX_RELATIONS_BY_RELCLASS.get(relClass))) { + + final Long count = freqs.getOrDefault(relClass, Long.MAX_VALUE); + final Long max = MAX_RELATIONS_BY_RELCLASS.getOrDefault(relClass, Long.MAX_VALUE); + + if (count <= max) { rew.add(link); freqs.put(relClass, freqs.get(relClass) + 1); }