Ckan Resource duplicates removal

This commit is contained in:
Fabio Sinibaldi 2020-02-06 17:58:11 +01:00
parent 7becd2f8d9
commit e559d3d99a
2 changed files with 34 additions and 2 deletions

View File

@ -3,6 +3,7 @@ package org.gcube.data.publishing.ckan2zenodo;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
@ -209,7 +210,7 @@ public class Translator {
try {
ObjectMapper mapper=Parsing.getMapper();
DocumentContext sourceCtx=JsonPath.using(Parsing.JSON_PATH_ALWAYS_LIST_CONFIG).parse(source.getContent());
ArrayList<CkanResource> toReturn=new ArrayList<>();
HashSet<CkanResource> toReturn=new HashSet();
for(Filter f:resourceFilter.getFilters()) {
JSONArray filtered=sourceCtx.read(f.getConditions().get(0));
for(Object obj:filtered) {
@ -219,7 +220,7 @@ public class Translator {
}
}
return toReturn;
return new ArrayList<CkanResource>(toReturn);
}catch(Throwable t) {
log.error("Unable to filter resources. ",t);
throw new TransformationException("Unable to filter "+source.getName()+" resources",t);

View File

@ -29,4 +29,35 @@ public class CkanResource{
private String id;
private String resource_type;
private String size;
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((name == null) ? 0 : name.hashCode());
result = prime * result + ((url == null) ? 0 : url.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
CkanResource other = (CkanResource) obj;
if (name == null) {
if (other.name != null)
return false;
} else if (!name.equals(other.name))
return false;
if (url == null) {
if (other.url != null)
return false;
} else if (!url.equals(other.url))
return false;
return true;
}
}