fixed bug in removing duplicates from the spatial list before sending the record to OpenSearch

This commit is contained in:
Alessia 2024-10-08 17:54:30 +02:00
parent a406a74721
commit 5d78b6e3b5
3 changed files with 48 additions and 68 deletions

View File

@ -1,5 +1,6 @@
package eu.dnetlib.ariadneplus.elasticsearch;
import eu.dnetlib.ariadneplus.elasticsearch.model.AriadneGeoPoint;
import eu.dnetlib.ariadneplus.elasticsearch.model.AriadnePlusEntry;
import eu.dnetlib.ariadneplus.elasticsearch.model.AriadneResource;
import eu.dnetlib.ariadneplus.elasticsearch.model.Spatial;
@ -185,7 +186,8 @@ public class BulkUpload {
ace.setSpatial(Arrays.asList(uniqueSpatial));
}
}
List<Spatial> dedupSpatials = removeDuplicates(ace.getSpatial());
List<Spatial> dedupSpatials = ace.getSpatial().stream().distinct().collect(Collectors.toList());
ace.getSpatial().clear();
ace.setSpatial(dedupSpatials);
}
@ -233,33 +235,34 @@ public class BulkUpload {
}
}
public static List<Spatial> removeDuplicates(List<Spatial> spatialList) {
Map<String, List<Spatial>> duplicatesMap = getDuplicatesMap(spatialList);
return duplicatesMap.values().stream()
.filter(spatials -> spatials!=null)
.map(spatials -> spatials.get(0))
.collect(Collectors.toList());
}
// public static List<Spatial> removeDuplicates(List<Spatial> spatialList) {
// Map<String, List<Spatial>> duplicatesMap = getDuplicatesMap(spatialList);
// return duplicatesMap.values().stream()
// .filter(spatials -> spatials!=null)
// .map(spatials -> spatials.get(0))
// .collect(Collectors.toList());
// }
private static Map<String, List<Spatial>> getDuplicatesMap(List<Spatial> spatialList) {
return spatialList.stream().collect(Collectors.groupingBy(BulkUpload::uniqueAttributes));
}
// private static Map<String, List<Spatial>> getDuplicatesMap(List<Spatial> spatialList) {
// return spatialList.stream().collect(Collectors.groupingBy(BulkUpload::uniqueAttributes));
// }
private static String uniqueAttributes(Spatial spatial){
if(Objects.isNull(spatial)){
return StringUtils.EMPTY;
}
String name = "";
if (!Objects.isNull(spatial.getPlaceName())) {
name = spatial.getPlaceName();
}
String lat = "";
String lon = "";
if (!Objects.isNull(spatial.getGeopoint())) {
lat = Double.toString(spatial.getGeopoint().getLat());
lon = Double.toString(spatial.getGeopoint().getLon());
}
String uniqueAttribute = (name) + (lat) + (lon);
return uniqueAttribute;
}
// private static String uniqueAttributes(Spatial spatial){
// if(Objects.isNull(spatial)){
// return StringUtils.EMPTY;
// }
// String name = "";
// if (!Objects.isNull(spatial.getPlaceName())) {
// name = spatial.getPlaceName();
// }
// String lat = "";
// String lon = "";
// if (!Objects.isNull(spatial.getGeopoint())) {
// lat = Double.toString(spatial.getGeopoint().getLat());
// lon = Double.toString(spatial.getGeopoint().getLon());
// }
//
// String uniqueAttribute = (name) + (lat) + (lon);
// return uniqueAttribute;
// }
}

View File

@ -5,6 +5,7 @@ import org.apache.lucene.spatial3d.geom.GeoShape;
import org.elasticsearch.common.geo.GeoPoint;
import java.util.List;
import java.util.Objects;
public class Spatial {
@ -160,4 +161,17 @@ public class Spatial {
public static Spatial fromJson(String json){
return new Gson().fromJson(json, Spatial.class);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Spatial)) return false;
Spatial spatial = (Spatial) o;
return Objects.equals(getPlaceName(), spatial.getPlaceName()) && Objects.equals(getAddress(), spatial.getAddress()) && Objects.equals(getGeopoint(), spatial.getGeopoint()) && Objects.equals(getBoundingbox(), spatial.getBoundingbox()) && Objects.equals(getPolygon(), spatial.getPolygon()) && Objects.equals(getSpatialPrecision(), spatial.getSpatialPrecision()) && Objects.equals(getCoordinatePrecision(), spatial.getCoordinatePrecision()) && Objects.equals(getCentroid(), spatial.getCentroid()) && Objects.equals(getBoundingBoxMaxLat(), spatial.getBoundingBoxMaxLat()) && Objects.equals(getBoundingBoxMaxLon(), spatial.getBoundingBoxMaxLon()) && Objects.equals(getBoundingBoxMinLat(), spatial.getBoundingBoxMinLat()) && Objects.equals(getBoundingBoxMinLon(), spatial.getBoundingBoxMinLon()) && Objects.equals(getLat(), spatial.getLat()) && Objects.equals(getLon(), spatial.getLon()) && Objects.equals(getPolygonGeoPoints(), spatial.getPolygonGeoPoints()) && Objects.equals(getWkt(), spatial.getWkt());
}
@Override
public int hashCode() {
return Objects.hash(getPlaceName(), getAddress(), getGeopoint(), getBoundingbox(), getPolygon(), getSpatialPrecision(), getCoordinatePrecision(), getCentroid(), getBoundingBoxMaxLat(), getBoundingBoxMaxLon(), getBoundingBoxMinLat(), getBoundingBoxMinLon(), getLat(), getLon(), getPolygonGeoPoints(), getWkt());
}
}

View File

@ -78,9 +78,9 @@ public class GraphDbReaderAndESIndexTest {
public void loadToStaging() throws Exception {
//String uri = "https://arche.acdh.oeaw.ac.at/api/255841";
String uri = "https://ariadne-infrastructure.eu/aocat/Resource/ADS/6CBA2A00-01DE-315B-934C-6ED74C3BC6DC";
String uri = "http://ariadne-infrastructure.eu/aocat/Resource/ADS/01B8B8E6-E835-3799-B7F2-4E9B3E2D60AB";
String datasource = "ads";
String apiId = "archives";
String apiId = "records_bangor11_4";
readAndIndexTest(true, uri, datasource, apiId);
}
@ -187,7 +187,7 @@ public class GraphDbReaderAndESIndexTest {
@Test
public void uploadADSArchivesBoundingBoxTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/90D1C95D-E249-3E74-92D9-B58FDF690CC7";
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/ADS/90D1C95D-E249-3E74-92D9-B58FDF690CC7";
String datasource = "ads";
String collectionId = "archives";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
@ -289,34 +289,6 @@ public class GraphDbReaderAndESIndexTest {
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test
public void uploadHNMCollectionSpatialTest() throws Exception {
boolean isRecord = false;
String recordId = "https://ariadne-infrastructure.eu/aocat/Collection/HNM/5A7A4257-EE73-31F9-9F74-BADB371555F5";
String datasource = "hnm";
String collectionId = "hnmad";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test
public void uploadHNMCollectionTemporalTest() throws Exception {
boolean isRecord = false;
String recordId = "https://ariadne-infrastructure.eu/aocat/Collection/HNM/1AE50143-45C7-304F-8367-BCF3606CEF10";
String datasource = "hnm";
String collectionId = "hnmad";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test
public void uploadDansSpatialTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/F100A0AD-6A7F-3976-B77F-FFAB4F5B55DD";
String datasource = "dans";
String collectionId = "easy";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test
public void uploadDansNativePeriodTest() throws Exception {
@ -327,15 +299,6 @@ public class GraphDbReaderAndESIndexTest {
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test
public void uploadDansTemporalPolygonTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/D4E12349-E214-3F3F-BEE4-D39D9138916B";
String datasource = "dans";
String collectionId = "easy";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test
public void uploadSndRockartTest() throws Exception {
boolean isRecord = true;