From e731a7658dcb4e88e8f584f5a8f25bf9e114d2e9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 27 Nov 2020 09:00:04 +0100 Subject: [PATCH] cleaning texts to remove tab characters too --- .../dhp/oa/graph/clean/CleaningFunctions.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java index 3d8ce6dcb..5155d0242 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java @@ -17,7 +17,7 @@ public class CleaningFunctions { public static final String DOI_URL_PREFIX_REGEX = "(^http(s?):\\/\\/)(((dx\\.)?doi\\.org)|(handle\\.test\\.datacite\\.org))\\/"; public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/"; - public static final String NEWLINES = "(?:\\n|\\r)"; + public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)"; public static final Set PID_BLACKLIST = new HashSet<>(); @@ -111,7 +111,7 @@ public class CleaningFunctions { .filter(sp -> StringUtils.isNotBlank(sp.getValue())) .filter(sp -> Objects.nonNull(sp.getQualifier())) .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) - .map(CleaningFunctions::removeNewLines) + .map(CleaningFunctions::cleanValue) .collect(Collectors.toList())); } if (Objects.nonNull(r.getTitle())) { @@ -122,7 +122,7 @@ public class CleaningFunctions { .stream() .filter(Objects::nonNull) .filter(sp -> StringUtils.isNotBlank(sp.getValue())) - .map(CleaningFunctions::removeNewLines) + .map(CleaningFunctions::cleanValue) .collect(Collectors.toList())); } if (Objects.nonNull(r.getDescription())) { @@ -133,7 +133,7 @@ public class CleaningFunctions { .stream() .filter(Objects::nonNull) .filter(sp -> StringUtils.isNotBlank(sp.getValue())) - .map(CleaningFunctions::removeNewLines) + .map(CleaningFunctions::cleanValue) .collect(Collectors.toList())); } if (Objects.nonNull(r.getPid())) { @@ -230,13 +230,13 @@ public class CleaningFunctions { return value; } - protected static StructuredProperty removeNewLines(StructuredProperty s) { - s.setValue(s.getValue().replaceAll(NEWLINES, " ")); + protected static StructuredProperty cleanValue(StructuredProperty s) { + s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " ")); return s; } - protected static Field removeNewLines(Field s) { - s.setValue(s.getValue().replaceAll(NEWLINES, " ")); + protected static Field cleanValue(Field s) { + s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " ")); return s; }