removed spatial with empty data; removed duplicates on spatial list

This commit is contained in:
Enrico Ottonello 2021-03-04 10:59:54 +01:00
parent 61f0d6877d
commit f2425ffa2d
2 changed files with 76 additions and 18 deletions

View File

@ -2,6 +2,7 @@ package eu.dnetlib.ariadneplus.elasticsearch;
import eu.dnetlib.ariadneplus.elasticsearch.model.*;
import eu.dnetlib.ariadneplus.reader.ResourceManager;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpHost;
@ -18,10 +19,7 @@ import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.*;
import java.util.stream.Collectors;
@Service
@ -85,16 +83,23 @@ public class BulkUpload {
ace.setSpatial(Arrays.asList(uniqueSpatial));
}
}
else {
if (ace.getSpatial()!=null) {
Set<String> items = new HashSet<>();
List<Spatial> spatialsNoDup = ace.getSpatial().stream()
.filter(s -> !items.add(s.getPlaceName()))
.collect(Collectors.toList());
ace.getSpatial().clear();
ace.setSpatial(spatialsNoDup);
}
// else {
// if (ace.getSpatial()!=null) {
// Set<String> items = new HashSet<>();
// List<Spatial> spatialsNoDup = ace.getSpatial().stream()
// .filter(s -> !items.add(s.getPlaceName()))
// .collect(Collectors.toList());
// ace.getSpatial().clear();
// ace.setSpatial(spatialsNoDup);
// }
//
// }
List<Spatial> dedupSpatials = removeDuplicates(ace.getSpatial());
ace.getSpatial().clear();
ace.setSpatial(dedupSpatials);
if (ace.getSpatial().size()>1) {
ace.getSpatial().removeIf(s -> (s.getPlaceName()!=null&&s.getPlaceName().equals("Name not provided")&&Objects.isNull(s.getLocation())));
}
}
else {
@ -180,4 +185,34 @@ public class BulkUpload {
}
return esResponseCode;
}
public static List<Spatial> removeDuplicates(List<Spatial> spatialList) {
Map<String, List<Spatial>> duplicatesMap = getDuplicatesMap(spatialList);
return duplicatesMap.values().stream()
.filter(spatials -> spatials!=null)
.map(spatials -> spatials.get(0))
.collect(Collectors.toList());
}
private static Map<String, List<Spatial>> getDuplicatesMap(List<Spatial> spatialList) {
return spatialList.stream().collect(Collectors.groupingBy(BulkUpload::uniqueAttributes));
}
private static String uniqueAttributes(Spatial spatial){
if(Objects.isNull(spatial)){
return StringUtils.EMPTY;
}
String name = "";
if (!Objects.isNull(spatial.getPlaceName())) {
name = spatial.getPlaceName();
}
String lat = "";
String lon = "";
if (!Objects.isNull(spatial.getLocation())) {
lat = Float.toString(spatial.getLocation().getLat());
lon = Float.toString(spatial.getLocation().getLon());
}
String uniqueAttribute = (name) + (lat) + (lon);
return uniqueAttribute;
}
}

View File

@ -32,7 +32,6 @@ public class GraphDbReaderAndESIndexTest {
private RunSPARQLQueryService runSPQRLQuery;
@Test
@Ignore
public void uploadAMCRFieldworkTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/AMCR/E61E0F4E-268F-39E4-8EDB-A431AFC505AA";
@ -42,7 +41,6 @@ public class GraphDbReaderAndESIndexTest {
}
@Test
@Ignore
public void uploadAMCRDocumentTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Collection/AMCR/FC59581D-DC3A-31DA-922A-98DE764F3D76";
@ -52,7 +50,6 @@ public class GraphDbReaderAndESIndexTest {
}
@Test
@Ignore
public void uploadAMCRSiteTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/AMCR/3C7EC936-A7CA-3720-B3DC-413A25754FD4";
@ -62,7 +59,6 @@ public class GraphDbReaderAndESIndexTest {
}
@Test
@Ignore
public void uploadADSRecordTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/34E3811A-0BAD-3832-B3A0-3139E8A0285C";
@ -72,7 +68,6 @@ public class GraphDbReaderAndESIndexTest {
}
@Test
@Ignore
public void uploadADSRecordWithNativeFromUntilTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/3C3C7A86-FF09-3431-95B1-B9A4AA8293AF";
@ -82,6 +77,7 @@ public class GraphDbReaderAndESIndexTest {
}
@Test
@Ignore
public void uploadADSRecordWithoutNativeFromUntilTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/DF5F27D3-C877-3F23-9EAA-3776362363AA";
@ -90,6 +86,33 @@ public class GraphDbReaderAndESIndexTest {
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test
public void uploadZbivaRecordSpatialTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/ZRC-SAZU-Zbiva/B34517C6-8D94-3A02-B461-08522F958479";
String datasource = "zrc_zbiva";
String collectionId = "sites";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test
public void uploadHNMCollectionSpatialTest() throws Exception {
boolean isRecord = false;
String recordId = "https://ariadne-portal-staging.d4science.org/resource/981B4251-FA9C-35E9-8654-57278808145D";
String datasource = "hnm";
String collectionId = "hnmad";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test
public void uploadDansNoSpatialTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-portal-staging.d4science.org/resource/F100A0AD-6A7F-3976-B77F-FFAB4F5B55DD";
String datasource = "dans";
String collectionId = "easy";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
private void readAndIndexTest(boolean isRecord, String recordId, String datasource, String collectionId) throws Exception {
final ClassPathResource resource = new ClassPathResource("application.properties");
Properties appProps = new Properties();