forked from D-Net/dnet-hadoop
Merge pull request 'removed duplicated fields' (#25) from unique_field_in_lists into master
Looks good as a temporary workaround. I agree the model could seamlessly make the distinct operation by using HashSets instead of Linked (or Array) Lists. The task to update the model in such a way is added on #9#issuecomment-1583 Thanks!
This commit is contained in:
commit
050dda223d
|
@ -4,7 +4,11 @@ package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.function.Predicate;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -57,6 +61,7 @@ public class OafMapperUtils {
|
||||||
.stream(values)
|
.stream(values)
|
||||||
.map(v -> field(v, info))
|
.map(v -> field(v, info))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
|
.filter(distinctByKey(f -> f.getValue()))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,6 +70,7 @@ public class OafMapperUtils {
|
||||||
.stream()
|
.stream()
|
||||||
.map(v -> field(v, info))
|
.map(v -> field(v, info))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
|
.filter(distinctByKey(f -> f.getValue()))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -237,4 +243,10 @@ public class OafMapperUtils {
|
||||||
public static String asString(final Object o) {
|
public static String asString(final Object o) {
|
||||||
return o == null ? "" : o.toString();
|
return o == null ? "" : o.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static <T> Predicate<T> distinctByKey(
|
||||||
|
final Function<? super T, ?> keyExtractor) {
|
||||||
|
final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
|
||||||
|
return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,6 +75,7 @@ public class MappersTest {
|
||||||
assertValidId(p.getCollectedfrom().get(0).getKey());
|
assertValidId(p.getCollectedfrom().get(0).getKey());
|
||||||
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
||||||
assertFalse(p.getDataInfo().getInvisible());
|
assertFalse(p.getDataInfo().getInvisible());
|
||||||
|
assertTrue(p.getSource().size() == 1);
|
||||||
|
|
||||||
assertTrue(p.getAuthor().size() > 0);
|
assertTrue(p.getAuthor().size() > 0);
|
||||||
final Optional<Author> author = p
|
final Optional<Author> author = p
|
||||||
|
@ -83,6 +84,7 @@ public class MappersTest {
|
||||||
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
||||||
.findFirst();
|
.findFirst();
|
||||||
assertTrue(author.isPresent());
|
assertTrue(author.isPresent());
|
||||||
|
|
||||||
final StructuredProperty pid = author
|
final StructuredProperty pid = author
|
||||||
.get()
|
.get()
|
||||||
.getPid()
|
.getPid()
|
||||||
|
|
|
@ -34,6 +34,8 @@
|
||||||
<dc:relation>info:eu-repo/semantics/altIdentifier/eissn/2367-8194</dc:relation>
|
<dc:relation>info:eu-repo/semantics/altIdentifier/eissn/2367-8194</dc:relation>
|
||||||
<dc:relation>info:eu-repo/grantAgreement/EC/FP7/226852</dc:relation>
|
<dc:relation>info:eu-repo/grantAgreement/EC/FP7/226852</dc:relation>
|
||||||
<dc:source>One Ecosystem 2: e13718</dc:source>
|
<dc:source>One Ecosystem 2: e13718</dc:source>
|
||||||
|
<dc:source>One Ecosystem 2: e13718</dc:source>
|
||||||
|
<dc:source>One Ecosystem 2: e13718</dc:source>
|
||||||
<dc:subject>Ecosystem Services hotspots</dc:subject>
|
<dc:subject>Ecosystem Services hotspots</dc:subject>
|
||||||
<dc:subject>Natura 2000</dc:subject>
|
<dc:subject>Natura 2000</dc:subject>
|
||||||
<dc:subject>Quiet Protected Areas</dc:subject>
|
<dc:subject>Quiet Protected Areas</dc:subject>
|
||||||
|
|
Loading…
Reference in New Issue