Merge pull request 'removed duplicated fields' (#25) from unique_field_in_lists into master

Looks good as a temporary workaround. I agree the model could seamlessly make the distinct operation by using HashSets instead of Linked (or Array) Lists.

The task to update the model in such a way is added on #9#issuecomment-1583

Thanks!
This commit is contained in:
Claudio Atzori 2020-07-20 12:12:50 +02:00
commit 050dda223d
3 changed files with 16 additions and 0 deletions

View File

@ -4,7 +4,11 @@ package eu.dnetlib.dhp.oa.graph.raw.common;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
@ -57,6 +61,7 @@ public class OafMapperUtils {
.stream(values)
.map(v -> field(v, info))
.filter(Objects::nonNull)
.filter(distinctByKey(f -> f.getValue()))
.collect(Collectors.toList());
}
@ -65,6 +70,7 @@ public class OafMapperUtils {
.stream()
.map(v -> field(v, info))
.filter(Objects::nonNull)
.filter(distinctByKey(f -> f.getValue()))
.collect(Collectors.toList());
}
@ -237,4 +243,10 @@ public class OafMapperUtils {
public static String asString(final Object o) {
return o == null ? "" : o.toString();
}
public static <T> Predicate<T> distinctByKey(
final Function<? super T, ?> keyExtractor) {
final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
}
}

View File

@ -75,6 +75,7 @@ public class MappersTest {
assertValidId(p.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertFalse(p.getDataInfo().getInvisible());
assertTrue(p.getSource().size() == 1);
assertTrue(p.getAuthor().size() > 0);
final Optional<Author> author = p
@ -83,6 +84,7 @@ public class MappersTest {
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.findFirst();
assertTrue(author.isPresent());
final StructuredProperty pid = author
.get()
.getPid()

View File

@ -34,6 +34,8 @@
<dc:relation>info:eu-repo/semantics/altIdentifier/eissn/2367-8194</dc:relation>
<dc:relation>info:eu-repo/grantAgreement/EC/FP7/226852</dc:relation>
<dc:source>One Ecosystem 2: e13718</dc:source>
<dc:source>One Ecosystem 2: e13718</dc:source>
<dc:source>One Ecosystem 2: e13718</dc:source>
<dc:subject>Ecosystem Services hotspots</dc:subject>
<dc:subject>Natura 2000</dc:subject>
<dc:subject>Quiet Protected Areas</dc:subject>