From 3bc07c58817d2153d719ae3bb71f0f0f8b8cda37 Mon Sep 17 00:00:00 2001 From: miconis Date: Wed, 9 Mar 2022 12:53:09 +0100 Subject: [PATCH] bug fix in the AuthorMatch, implementation of the concat function in the model creation with jpath query --- .../src/main/java/eu/dnetlib/pace/config/Type.java | 2 +- .../main/java/eu/dnetlib/pace/tree/AuthorsMatch.java | 6 ++++-- .../java/eu/dnetlib/pace/util/MapDocumentUtil.java | 12 ++++++++++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/Type.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/Type.java index 0f1f696ab..33ae4015f 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/Type.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/Type.java @@ -1,5 +1,5 @@ package eu.dnetlib.pace.config; public enum Type { - String, Int, List, JSON, URL + String, Int, List, JSON, URL, StringConcat } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java index 3f9fdd37a..6307cd576 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java @@ -77,11 +77,13 @@ public class AuthorsMatch extends AbstractComparator { //one person is inaccurate if (p1.isAccurate() ^ p2.isAccurate()) { //prepare data + //data for the accurate person String name = normalization(p1.isAccurate()? p1.getNormalisedFirstName() : p2.getNormalisedFirstName()); - String surname = normalization(p1.isAccurate()? p2.getNormalisedSurname() : p2.getNormalisedSurname()); + String surname = normalization(p1.isAccurate()? p1.getNormalisedSurname() : p2.getNormalisedSurname()); + //data for the inaccurate person String fullname = normalization( - p1.isAccurate() ? ((p1.getNormalisedFullname().isEmpty()) ? p1.getOriginal() : p1.getNormalisedFullname()) : (p2.getNormalisedFullname().isEmpty() ? p2.getOriginal() : p2.getNormalisedFullname()) + p1.isAccurate() ? ((p2.getNormalisedFullname().isEmpty()) ? p2.getOriginal() : p2.getNormalisedFullname()) : (p1.getNormalisedFullname().isEmpty() ? p1.getOriginal() : p1.getNormalisedFullname()) ); if (fullname.contains(surname)) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java index 2683c7b24..f9bd9399b 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java @@ -15,6 +15,7 @@ import net.minidev.json.JSONArray; import java.util.*; import java.util.function.Predicate; +import java.util.stream.Collectors; public class MapDocumentUtil { @@ -45,6 +46,17 @@ public class MapDocumentUtil { .forEach(fi::add); stringField.put(fdef.getName(), fi); break; + case StringConcat: + String[] jpaths = fdef.getPath().split("\\|\\|\\|"); + stringField.put( + fdef.getName(), + new FieldValueImpl(Type.String, + fdef.getName(), + truncateValue(Arrays.stream(jpaths).map(jpath -> getJPathString(jpath, json)).collect(Collectors.joining(" ")), + fdef.getLength()) + ) + ); + break; } }); m.setFieldMap(stringField);