fixed bug in removing duplicates from the spatial list before sending the record to OpenSearch

This commit is contained in:
Alessia 2024-10-08 17:54:30 +02:00
parent a406a74721
commit 5d78b6e3b5
3 changed files with 48 additions and 68 deletions

View File

@ -1,5 +1,6 @@
package eu.dnetlib.ariadneplus.elasticsearch; package eu.dnetlib.ariadneplus.elasticsearch;
import eu.dnetlib.ariadneplus.elasticsearch.model.AriadneGeoPoint;
import eu.dnetlib.ariadneplus.elasticsearch.model.AriadnePlusEntry; import eu.dnetlib.ariadneplus.elasticsearch.model.AriadnePlusEntry;
import eu.dnetlib.ariadneplus.elasticsearch.model.AriadneResource; import eu.dnetlib.ariadneplus.elasticsearch.model.AriadneResource;
import eu.dnetlib.ariadneplus.elasticsearch.model.Spatial; import eu.dnetlib.ariadneplus.elasticsearch.model.Spatial;
@ -185,7 +186,8 @@ public class BulkUpload {
ace.setSpatial(Arrays.asList(uniqueSpatial)); ace.setSpatial(Arrays.asList(uniqueSpatial));
} }
} }
List<Spatial> dedupSpatials = removeDuplicates(ace.getSpatial());
List<Spatial> dedupSpatials = ace.getSpatial().stream().distinct().collect(Collectors.toList());
ace.getSpatial().clear(); ace.getSpatial().clear();
ace.setSpatial(dedupSpatials); ace.setSpatial(dedupSpatials);
} }
@ -233,33 +235,34 @@ public class BulkUpload {
} }
} }
public static List<Spatial> removeDuplicates(List<Spatial> spatialList) { // public static List<Spatial> removeDuplicates(List<Spatial> spatialList) {
Map<String, List<Spatial>> duplicatesMap = getDuplicatesMap(spatialList); // Map<String, List<Spatial>> duplicatesMap = getDuplicatesMap(spatialList);
return duplicatesMap.values().stream() // return duplicatesMap.values().stream()
.filter(spatials -> spatials!=null) // .filter(spatials -> spatials!=null)
.map(spatials -> spatials.get(0)) // .map(spatials -> spatials.get(0))
.collect(Collectors.toList()); // .collect(Collectors.toList());
} // }
private static Map<String, List<Spatial>> getDuplicatesMap(List<Spatial> spatialList) { // private static Map<String, List<Spatial>> getDuplicatesMap(List<Spatial> spatialList) {
return spatialList.stream().collect(Collectors.groupingBy(BulkUpload::uniqueAttributes)); // return spatialList.stream().collect(Collectors.groupingBy(BulkUpload::uniqueAttributes));
} // }
private static String uniqueAttributes(Spatial spatial){ // private static String uniqueAttributes(Spatial spatial){
if(Objects.isNull(spatial)){ // if(Objects.isNull(spatial)){
return StringUtils.EMPTY; // return StringUtils.EMPTY;
} // }
String name = ""; // String name = "";
if (!Objects.isNull(spatial.getPlaceName())) { // if (!Objects.isNull(spatial.getPlaceName())) {
name = spatial.getPlaceName(); // name = spatial.getPlaceName();
} // }
String lat = ""; // String lat = "";
String lon = ""; // String lon = "";
if (!Objects.isNull(spatial.getGeopoint())) { // if (!Objects.isNull(spatial.getGeopoint())) {
lat = Double.toString(spatial.getGeopoint().getLat()); // lat = Double.toString(spatial.getGeopoint().getLat());
lon = Double.toString(spatial.getGeopoint().getLon()); // lon = Double.toString(spatial.getGeopoint().getLon());
} // }
String uniqueAttribute = (name) + (lat) + (lon); //
return uniqueAttribute; // String uniqueAttribute = (name) + (lat) + (lon);
} // return uniqueAttribute;
// }
} }

View File

@ -5,6 +5,7 @@ import org.apache.lucene.spatial3d.geom.GeoShape;
import org.elasticsearch.common.geo.GeoPoint; import org.elasticsearch.common.geo.GeoPoint;
import java.util.List; import java.util.List;
import java.util.Objects;
public class Spatial { public class Spatial {
@ -160,4 +161,17 @@ public class Spatial {
public static Spatial fromJson(String json){ public static Spatial fromJson(String json){
return new Gson().fromJson(json, Spatial.class); return new Gson().fromJson(json, Spatial.class);
} }
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Spatial)) return false;
Spatial spatial = (Spatial) o;
return Objects.equals(getPlaceName(), spatial.getPlaceName()) && Objects.equals(getAddress(), spatial.getAddress()) && Objects.equals(getGeopoint(), spatial.getGeopoint()) && Objects.equals(getBoundingbox(), spatial.getBoundingbox()) && Objects.equals(getPolygon(), spatial.getPolygon()) && Objects.equals(getSpatialPrecision(), spatial.getSpatialPrecision()) && Objects.equals(getCoordinatePrecision(), spatial.getCoordinatePrecision()) && Objects.equals(getCentroid(), spatial.getCentroid()) && Objects.equals(getBoundingBoxMaxLat(), spatial.getBoundingBoxMaxLat()) && Objects.equals(getBoundingBoxMaxLon(), spatial.getBoundingBoxMaxLon()) && Objects.equals(getBoundingBoxMinLat(), spatial.getBoundingBoxMinLat()) && Objects.equals(getBoundingBoxMinLon(), spatial.getBoundingBoxMinLon()) && Objects.equals(getLat(), spatial.getLat()) && Objects.equals(getLon(), spatial.getLon()) && Objects.equals(getPolygonGeoPoints(), spatial.getPolygonGeoPoints()) && Objects.equals(getWkt(), spatial.getWkt());
}
@Override
public int hashCode() {
return Objects.hash(getPlaceName(), getAddress(), getGeopoint(), getBoundingbox(), getPolygon(), getSpatialPrecision(), getCoordinatePrecision(), getCentroid(), getBoundingBoxMaxLat(), getBoundingBoxMaxLon(), getBoundingBoxMinLat(), getBoundingBoxMinLon(), getLat(), getLon(), getPolygonGeoPoints(), getWkt());
}
} }

View File

@ -78,9 +78,9 @@ public class GraphDbReaderAndESIndexTest {
public void loadToStaging() throws Exception { public void loadToStaging() throws Exception {
//String uri = "https://arche.acdh.oeaw.ac.at/api/255841"; //String uri = "https://arche.acdh.oeaw.ac.at/api/255841";
String uri = "https://ariadne-infrastructure.eu/aocat/Resource/ADS/6CBA2A00-01DE-315B-934C-6ED74C3BC6DC"; String uri = "http://ariadne-infrastructure.eu/aocat/Resource/ADS/01B8B8E6-E835-3799-B7F2-4E9B3E2D60AB";
String datasource = "ads"; String datasource = "ads";
String apiId = "archives"; String apiId = "records_bangor11_4";
readAndIndexTest(true, uri, datasource, apiId); readAndIndexTest(true, uri, datasource, apiId);
} }
@ -187,7 +187,7 @@ public class GraphDbReaderAndESIndexTest {
@Test @Test
public void uploadADSArchivesBoundingBoxTest() throws Exception { public void uploadADSArchivesBoundingBoxTest() throws Exception {
boolean isRecord = true; boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/90D1C95D-E249-3E74-92D9-B58FDF690CC7"; String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/ADS/90D1C95D-E249-3E74-92D9-B58FDF690CC7";
String datasource = "ads"; String datasource = "ads";
String collectionId = "archives"; String collectionId = "archives";
readAndIndexTest(isRecord, recordId, datasource, collectionId); readAndIndexTest(isRecord, recordId, datasource, collectionId);
@ -289,34 +289,6 @@ public class GraphDbReaderAndESIndexTest {
readAndIndexTest(isRecord, recordId, datasource, collectionId); readAndIndexTest(isRecord, recordId, datasource, collectionId);
} }
@Test
public void uploadHNMCollectionSpatialTest() throws Exception {
boolean isRecord = false;
String recordId = "https://ariadne-infrastructure.eu/aocat/Collection/HNM/5A7A4257-EE73-31F9-9F74-BADB371555F5";
String datasource = "hnm";
String collectionId = "hnmad";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test
public void uploadHNMCollectionTemporalTest() throws Exception {
boolean isRecord = false;
String recordId = "https://ariadne-infrastructure.eu/aocat/Collection/HNM/1AE50143-45C7-304F-8367-BCF3606CEF10";
String datasource = "hnm";
String collectionId = "hnmad";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test
public void uploadDansSpatialTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/F100A0AD-6A7F-3976-B77F-FFAB4F5B55DD";
String datasource = "dans";
String collectionId = "easy";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test @Test
public void uploadDansNativePeriodTest() throws Exception { public void uploadDansNativePeriodTest() throws Exception {
@ -327,15 +299,6 @@ public class GraphDbReaderAndESIndexTest {
readAndIndexTest(isRecord, recordId, datasource, collectionId); readAndIndexTest(isRecord, recordId, datasource, collectionId);
} }
@Test
public void uploadDansTemporalPolygonTest() throws Exception {
boolean isRecord = true;
String recordId = "https://ariadne-infrastructure.eu/aocat/Resource/D4E12349-E214-3F3F-BEE4-D39D9138916B";
String datasource = "dans";
String collectionId = "easy";
readAndIndexTest(isRecord, recordId, datasource, collectionId);
}
@Test @Test
public void uploadSndRockartTest() throws Exception { public void uploadSndRockartTest() throws Exception {
boolean isRecord = true; boolean isRecord = true;