240 lines
9.0 KiB
Java
240 lines
9.0 KiB
Java
package org.gcube.data.spd.obisplugin.search;
|
|
|
|
|
|
import java.text.SimpleDateFormat;
|
|
import java.util.ArrayList;
|
|
import java.util.Arrays;
|
|
import java.util.Calendar;
|
|
import java.util.HashSet;
|
|
import java.util.LinkedList;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Set;
|
|
|
|
import org.gcube.data.spd.model.Condition;
|
|
import org.gcube.data.spd.model.exceptions.StreamBlockingException;
|
|
import org.gcube.data.spd.model.products.DataSet;
|
|
import org.gcube.data.spd.model.products.Product;
|
|
import org.gcube.data.spd.model.products.Product.ProductType;
|
|
import org.gcube.data.spd.model.products.ResultItem;
|
|
import org.gcube.data.spd.model.products.Taxon;
|
|
import org.gcube.data.spd.obisplugin.Constants;
|
|
import org.gcube.data.spd.obisplugin.search.query.MappingUtils;
|
|
import org.gcube.data.spd.obisplugin.search.query.PagedQueryIterator;
|
|
import org.gcube.data.spd.obisplugin.search.query.PagedQueryObject;
|
|
import org.gcube.data.spd.obisplugin.search.query.QueryByIdentifier;
|
|
import org.gcube.data.spd.obisplugin.search.query.QueryCondition;
|
|
import org.gcube.data.spd.obisplugin.search.query.QueryCount;
|
|
import org.gcube.data.spd.obisplugin.search.query.QueryType;
|
|
import org.gcube.data.spd.obisplugin.search.query.ResultType;
|
|
import org.gcube.data.spd.plugin.fwk.writers.ObjectWriter;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
public class ResultItemSearch {
|
|
|
|
private static final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
|
|
|
|
private static Logger log = LoggerFactory.getLogger(ResultItemSearch.class);
|
|
|
|
private List<QueryCondition> queryConditions = new ArrayList<QueryCondition>();
|
|
|
|
private String baseURL;
|
|
|
|
String searchQuery;
|
|
|
|
public ResultItemSearch(String baseURL, String searchQuery, Condition... conditions) {
|
|
this.baseURL = baseURL;
|
|
this.searchQuery = searchQuery.replaceAll(" ", "%20").trim();
|
|
this.searchQuery = this.searchQuery.substring(0, 1).toUpperCase()
|
|
+ this.searchQuery.substring(1, this.searchQuery.length()).toLowerCase();
|
|
try {
|
|
this.queryConditions = Utils.elaborateConditions(conditions);
|
|
} catch (Exception e) {
|
|
log.error("error elaborating conditions", e);
|
|
}
|
|
}
|
|
|
|
public void search(ObjectWriter<ResultItem> writer, int limit) {
|
|
PagedQueryObject queryObject = new PagedQueryObject(baseURL, ResultType.Occurrence, limit);
|
|
queryObject.setConditions(QueryCondition.cond("scientificname", searchQuery));
|
|
queryObject.getConditions().addAll(this.queryConditions);
|
|
try {
|
|
PagedQueryIterator<ResultItem> pagedIterator = new PagedQueryIterator<ResultItem>(queryObject) {
|
|
|
|
Set<String> alreadyVisited = new HashSet<String>();
|
|
|
|
@Override
|
|
protected ResultItem getObject(Map<String, Object> mappedObject) throws Exception {
|
|
ResultItem resultItem = buildResult(mappedObject);
|
|
log.debug("ResultItem: " + resultItem);
|
|
return resultItem;
|
|
}
|
|
|
|
@Override
|
|
protected boolean useIt(Map<String, Object> mappedObject) {
|
|
String datasetKey = (String) mappedObject.get("dataset_id");// resourceID
|
|
Integer taxonId = (Integer) mappedObject.get("aphiaID");// obisID
|
|
String key = datasetKey + "|" + taxonId;
|
|
if (alreadyVisited.contains(key))
|
|
return false;
|
|
alreadyVisited.add(key);
|
|
return true;
|
|
}
|
|
|
|
};
|
|
|
|
while (pagedIterator.hasNext() && writer.isAlive())
|
|
writer.write(pagedIterator.next());
|
|
|
|
} catch (Exception e) {
|
|
log.error("error writing resultItems", e);
|
|
writer.write(new StreamBlockingException(Constants.REPOSITORY_NAME));
|
|
}
|
|
|
|
}
|
|
|
|
ResultItem buildResult(Map<String, Object> singleObject) throws Exception {
|
|
try {
|
|
long start = System.currentTimeMillis();
|
|
// log.debug("Retrieved Occurence: "+singleObject);
|
|
Integer taxonId = MappingUtils.getAsInteger(singleObject, "aphiaID");// obisID
|
|
String scientificName = MappingUtils.getAsString(singleObject, "scientificName");
|
|
ResultItem resItem = new ResultItem(taxonId.toString(), scientificName);
|
|
String scientificNameAuthorship = MappingUtils.getAsString(singleObject, "scientificNameAuthorship");
|
|
resItem.setScientificNameAuthorship(scientificNameAuthorship);
|
|
resItem.setLsid(MappingUtils.getAsString(singleObject,"scientificNameID"));
|
|
|
|
QueryByIdentifier query = new QueryByIdentifier(baseURL, taxonId.toString(), QueryType.Taxon);
|
|
Map<String, Object> taxonData = MappingUtils.getObjectMapping(query.build());
|
|
log.debug("Retrieved taxon: " + taxonData);
|
|
if (taxonData != null && !taxonData.isEmpty()) {
|
|
@SuppressWarnings("unchecked")
|
|
List<Map<String, Object>> taxonList = (List<Map<String, Object>>) taxonData.get("results");
|
|
if (taxonList != null && !taxonList.isEmpty()) {
|
|
Map<String, Object> taxonSingle = taxonList.get(0);
|
|
String taxonRank = MappingUtils.getAsString(taxonSingle, "taxonRank");
|
|
resItem.setRank(taxonRank);// rank_name
|
|
resItem.setParent(retrieveParentTaxon(taxonId, taxonRank, taxonSingle));// parent_id
|
|
}
|
|
}
|
|
|
|
|
|
|
|
DataSet dataset = DataSetRetreiver.get(baseURL, MappingUtils.getAsString(singleObject, "dataset_id"));
|
|
resItem.setProvider(dataset.getDataProvider().getName());
|
|
resItem.setDataSet(dataset);
|
|
|
|
List<Product> products = retrieveProducts(taxonId.toString(), dataset);
|
|
resItem.setProducts(products);
|
|
|
|
String credits = "Biodiversity occurrence accessed through OBIS WebService, https://api.obis.org/v3/, "
|
|
+ format.format(Calendar.getInstance().getTime()) + ")";
|
|
resItem.setCredits(credits);
|
|
log.trace("[Benchmark] time to retrieve ResultItem is " + (System.currentTimeMillis() - start));
|
|
log.debug("found species: id={}, name={}", resItem.getId(), resItem.getScientificName());
|
|
return resItem;
|
|
} catch (Exception e) {
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
private Taxon retrieveParentTaxon(Integer taxonId, String taxonRank, Map<String, Object> taxonMap)
|
|
throws Exception {
|
|
log.debug("Call retrieve parentTaxon: [taxonId={}, taxonRank={}]",taxonId,taxonRank);
|
|
if (taxonId == 0)
|
|
return null;
|
|
long start = System.currentTimeMillis();
|
|
|
|
LinkedList<Taxon> taxons = new LinkedList<>();
|
|
|
|
String searchTaxon = taxonRank;
|
|
Map<String, Object> searchMap = taxonMap;
|
|
Taxon previousTaxon = null;
|
|
Taxon taxonToReturn = null;
|
|
TaxonCategories currentTaxon=null;
|
|
boolean end = false;
|
|
|
|
while (!end) {
|
|
|
|
currentTaxon = TaxonCategories.getTaxonCategory(searchTaxon);
|
|
if (currentTaxon == null) {
|
|
break;
|
|
}
|
|
boolean foundParentId = false;
|
|
Integer parentId=null;
|
|
while (!foundParentId&&!end) {
|
|
TaxonCategories parentTaxon = TaxonCategories.getParent(currentTaxon);
|
|
if (parentTaxon == null) {
|
|
end=true;
|
|
break;
|
|
}
|
|
String parentIdKey = parentTaxon.name().toLowerCase() + "id";
|
|
if (searchMap.containsKey(parentIdKey)) {
|
|
parentId = MappingUtils.getAsInteger(searchMap, parentIdKey);
|
|
currentTaxon=parentTaxon;
|
|
foundParentId=true;
|
|
}else {
|
|
currentTaxon=parentTaxon;
|
|
}
|
|
}
|
|
|
|
if(end||!foundParentId){
|
|
break;
|
|
}
|
|
log.debug("Found parentId: "+parentId);
|
|
|
|
QueryByIdentifier query = new QueryByIdentifier(baseURL, parentId.toString(), QueryType.Taxon);
|
|
Map<String, Object> parentTaxonData = MappingUtils.getObjectMapping(query.build());
|
|
log.debug("ParentTaxon: " + parentTaxonData);
|
|
if (parentTaxonData != null && !parentTaxonData.isEmpty()) {
|
|
@SuppressWarnings("unchecked")
|
|
List<Map<String, Object>> parentTaxonList = (List<Map<String, Object>>) parentTaxonData.get("results");
|
|
if (parentTaxonList != null && !parentTaxonList.isEmpty()) {
|
|
searchMap = parentTaxonList.get(0);
|
|
|
|
Taxon taxon = new Taxon(MappingUtils.getAsInteger(searchMap, "taxonID").toString(),
|
|
MappingUtils.getAsString(searchMap, "scientificName"));
|
|
taxon.setScientificNameAuthorship(MappingUtils.getAsString(searchMap, "scientificNameAuthorship"));
|
|
searchTaxon=MappingUtils.getAsString(searchMap, "taxonRank");
|
|
taxon.setRank(searchTaxon);// rank_name
|
|
if (previousTaxon == null) {
|
|
previousTaxon = taxon;
|
|
} else {
|
|
previousTaxon.setParent(taxon);
|
|
taxons.add(previousTaxon);
|
|
previousTaxon = taxon;
|
|
}
|
|
|
|
} else {
|
|
end = true;
|
|
}
|
|
} else {
|
|
end = true;
|
|
}
|
|
}
|
|
|
|
if (taxons.isEmpty()) {
|
|
taxonToReturn = previousTaxon;
|
|
} else {
|
|
taxonToReturn = taxons.getFirst();
|
|
}
|
|
log.trace("[Benchmark] time to retrieve taxon is " + (System.currentTimeMillis() - start));
|
|
return taxonToReturn;
|
|
}
|
|
|
|
private List<Product> retrieveProducts(String taxonId, DataSet dataset) throws Exception {
|
|
long start = System.currentTimeMillis();
|
|
QueryCount occurrencesQuery = new QueryCount(baseURL, ResultType.Occurrence);
|
|
occurrencesQuery.setConditions(QueryCondition.cond("taxonid", taxonId),
|
|
QueryCondition.cond("datasetid", dataset.getId()));
|
|
occurrencesQuery.getConditions().addAll(this.queryConditions);
|
|
String productId = Utils.createProductsKey(Utils.getDataSetAsString(dataset), taxonId, this.queryConditions);
|
|
Product product = new Product(ProductType.Occurrence, productId);
|
|
product.setCount(occurrencesQuery.getCount());
|
|
log.trace("[Benchmark] time to retrieve product is " + (System.currentTimeMillis() - start));
|
|
return Arrays.asList(product);
|
|
}
|
|
|
|
}
|