diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 9153a6476..ea85ad65b 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -472,23 +472,12 @@ public class GraphCleaningFunctions extends CleaningFunctions { .stream() .filter(Objects::nonNull) .filter(sp -> StringUtils.isNotBlank(sp.getValue())) - .filter( - sp -> { - final String title = sp - .getValue() - .toLowerCase(); - final String decoded = Unidecode.decode(title); - - if (StringUtils.contains(decoded, TITLE_TEST)) { - return decoded - .replaceAll(TITLE_FILTER_REGEX, "") - .length() > TITLE_FILTER_RESIDUAL_LENGTH; - } - return !decoded - .replaceAll("\\W|\\d", "") - .isEmpty(); - }) + .filter(GraphCleaningFunctions::checkTestTitle) .map(GraphCleaningFunctions::cleanValue) + .sorted( + Comparator.comparingInt((StructuredProperty t) -> t.getValue().length()).reversed()) + .limit(ModelHardLimits.MAX_TITLES) + .map(GraphCleaningFunctions::shortenTitles) .collect(Collectors.toList())); } if (Objects.nonNull(r.getFormat())) { @@ -815,6 +804,29 @@ public class GraphCleaningFunctions extends CleaningFunctions { return value; } + private static StructuredProperty shortenTitles(StructuredProperty title) { + if (title.getValue().length() > ModelHardLimits.MAX_TITLE_LENGTH) { + title.setValue(StringUtils.left(title.getValue(), ModelHardLimits.MAX_TITLE_LENGTH)); + } + return title; + } + + private static boolean checkTestTitle(StructuredProperty sp) { + final String title = sp + .getValue() + .toLowerCase(); + final String decoded = Unidecode.decode(title); + + if (StringUtils.contains(decoded, TITLE_TEST)) { + return decoded + .replaceAll(TITLE_FILTER_REGEX, "") + .length() > TITLE_FILTER_RESIDUAL_LENGTH; + } + return !decoded + .replaceAll("\\W|\\d", "") + .isEmpty(); + } + private static Author cleanupAuthor(Author author) { if (StringUtils.isNotBlank(author.getFullname())) { author diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java index 761b9170f..275dea533 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ModelHardLimits.java @@ -20,7 +20,7 @@ public class ModelHardLimits { public static final int MAX_AUTHORS = 200; public static final int MAX_RELATED_AUTHORS = 20; public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000; - public static final int MAX_TITLE_LENGTH = 5000; + public static final int MAX_TITLE_LENGTH = 500; public static final int MAX_TITLES = 10; public static final int MAX_ABSTRACTS = 10; public static final int MAX_ABSTRACT_LENGTH = 150000;