obis-spd-plugin/src/main/java/org/gcube/data/spd/obisplugin/search/ResultItemSearch.java

240 lines
9.0 KiB
Java

package org.gcube.data.spd.obisplugin.search;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.gcube.data.spd.model.Condition;
import org.gcube.data.spd.model.exceptions.StreamBlockingException;
import org.gcube.data.spd.model.products.DataSet;
import org.gcube.data.spd.model.products.Product;
import org.gcube.data.spd.model.products.Product.ProductType;
import org.gcube.data.spd.model.products.ResultItem;
import org.gcube.data.spd.model.products.Taxon;
import org.gcube.data.spd.obisplugin.Constants;
import org.gcube.data.spd.obisplugin.search.query.MappingUtils;
import org.gcube.data.spd.obisplugin.search.query.PagedQueryIterator;
import org.gcube.data.spd.obisplugin.search.query.PagedQueryObject;
import org.gcube.data.spd.obisplugin.search.query.QueryByIdentifier;
import org.gcube.data.spd.obisplugin.search.query.QueryCondition;
import org.gcube.data.spd.obisplugin.search.query.QueryCount;
import org.gcube.data.spd.obisplugin.search.query.QueryType;
import org.gcube.data.spd.obisplugin.search.query.ResultType;
import org.gcube.data.spd.plugin.fwk.writers.ObjectWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ResultItemSearch {
private static final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
private static Logger log = LoggerFactory.getLogger(ResultItemSearch.class);
private List<QueryCondition> queryConditions = new ArrayList<QueryCondition>();
private String baseURL;
String searchQuery;
public ResultItemSearch(String baseURL, String searchQuery, Condition... conditions) {
this.baseURL = baseURL;
this.searchQuery = searchQuery.replaceAll(" ", "%20").trim();
this.searchQuery = this.searchQuery.substring(0, 1).toUpperCase()
+ this.searchQuery.substring(1, this.searchQuery.length()).toLowerCase();
try {
this.queryConditions = Utils.elaborateConditions(conditions);
} catch (Exception e) {
log.error("error elaborating conditions", e);
}
}
public void search(ObjectWriter<ResultItem> writer, int limit) {
PagedQueryObject queryObject = new PagedQueryObject(baseURL, ResultType.Occurrence, limit);
queryObject.setConditions(QueryCondition.cond("scientificname", searchQuery));
queryObject.getConditions().addAll(this.queryConditions);
try {
PagedQueryIterator<ResultItem> pagedIterator = new PagedQueryIterator<ResultItem>(queryObject) {
Set<String> alreadyVisited = new HashSet<String>();
@Override
protected ResultItem getObject(Map<String, Object> mappedObject) throws Exception {
ResultItem resultItem = buildResult(mappedObject);
log.debug("ResultItem: " + resultItem);
return resultItem;
}
@Override
protected boolean useIt(Map<String, Object> mappedObject) {
String datasetKey = (String) mappedObject.get("dataset_id");// resourceID
Integer taxonId = (Integer) mappedObject.get("aphiaID");// obisID
String key = datasetKey + "|" + taxonId;
if (alreadyVisited.contains(key))
return false;
alreadyVisited.add(key);
return true;
}
};
while (pagedIterator.hasNext() && writer.isAlive())
writer.write(pagedIterator.next());
} catch (Exception e) {
log.error("error writing resultItems", e);
writer.write(new StreamBlockingException(Constants.REPOSITORY_NAME));
}
}
ResultItem buildResult(Map<String, Object> singleObject) throws Exception {
try {
long start = System.currentTimeMillis();
// log.debug("Retrieved Occurence: "+singleObject);
Integer taxonId = MappingUtils.getAsInteger(singleObject, "aphiaID");// obisID
String scientificName = MappingUtils.getAsString(singleObject, "scientificName");
ResultItem resItem = new ResultItem(taxonId.toString(), scientificName);
String scientificNameAuthorship = MappingUtils.getAsString(singleObject, "scientificNameAuthorship");
resItem.setScientificNameAuthorship(scientificNameAuthorship);
resItem.setLsid(MappingUtils.getAsString(singleObject,"scientificNameID"));
QueryByIdentifier query = new QueryByIdentifier(baseURL, taxonId.toString(), QueryType.Taxon);
Map<String, Object> taxonData = MappingUtils.getObjectMapping(query.build());
log.debug("Retrieved taxon: " + taxonData);
if (taxonData != null && !taxonData.isEmpty()) {
@SuppressWarnings("unchecked")
List<Map<String, Object>> taxonList = (List<Map<String, Object>>) taxonData.get("results");
if (taxonList != null && !taxonList.isEmpty()) {
Map<String, Object> taxonSingle = taxonList.get(0);
String taxonRank = MappingUtils.getAsString(taxonSingle, "taxonRank");
resItem.setRank(taxonRank);// rank_name
resItem.setParent(retrieveParentTaxon(taxonId, taxonRank, taxonSingle));// parent_id
}
}
DataSet dataset = DataSetRetreiver.get(baseURL, MappingUtils.getAsString(singleObject, "dataset_id"));
resItem.setProvider(dataset.getDataProvider().getName());
resItem.setDataSet(dataset);
List<Product> products = retrieveProducts(taxonId.toString(), dataset);
resItem.setProducts(products);
String credits = "Biodiversity occurrence accessed through OBIS WebService, https://api.obis.org/v3/, "
+ format.format(Calendar.getInstance().getTime()) + ")";
resItem.setCredits(credits);
log.trace("[Benchmark] time to retrieve ResultItem is " + (System.currentTimeMillis() - start));
log.debug("found species: id={}, name={}", resItem.getId(), resItem.getScientificName());
return resItem;
} catch (Exception e) {
throw e;
}
}
private Taxon retrieveParentTaxon(Integer taxonId, String taxonRank, Map<String, Object> taxonMap)
throws Exception {
log.debug("Call retrieve parentTaxon: [taxonId={}, taxonRank={}]",taxonId,taxonRank);
if (taxonId == 0)
return null;
long start = System.currentTimeMillis();
LinkedList<Taxon> taxons = new LinkedList<>();
String searchTaxon = taxonRank;
Map<String, Object> searchMap = taxonMap;
Taxon previousTaxon = null;
Taxon taxonToReturn = null;
TaxonCategories currentTaxon=null;
boolean end = false;
while (!end) {
currentTaxon = TaxonCategories.getTaxonCategory(searchTaxon);
if (currentTaxon == null) {
break;
}
boolean foundParentId = false;
Integer parentId=null;
while (!foundParentId&&!end) {
TaxonCategories parentTaxon = TaxonCategories.getParent(currentTaxon);
if (parentTaxon == null) {
end=true;
break;
}
String parentIdKey = parentTaxon.name().toLowerCase() + "id";
if (searchMap.containsKey(parentIdKey)) {
parentId = MappingUtils.getAsInteger(searchMap, parentIdKey);
currentTaxon=parentTaxon;
foundParentId=true;
}else {
currentTaxon=parentTaxon;
}
}
if(end||!foundParentId){
break;
}
log.debug("Found parentId: "+parentId);
QueryByIdentifier query = new QueryByIdentifier(baseURL, parentId.toString(), QueryType.Taxon);
Map<String, Object> parentTaxonData = MappingUtils.getObjectMapping(query.build());
log.debug("ParentTaxon: " + parentTaxonData);
if (parentTaxonData != null && !parentTaxonData.isEmpty()) {
@SuppressWarnings("unchecked")
List<Map<String, Object>> parentTaxonList = (List<Map<String, Object>>) parentTaxonData.get("results");
if (parentTaxonList != null && !parentTaxonList.isEmpty()) {
searchMap = parentTaxonList.get(0);
Taxon taxon = new Taxon(MappingUtils.getAsInteger(searchMap, "taxonID").toString(),
MappingUtils.getAsString(searchMap, "scientificName"));
taxon.setScientificNameAuthorship(MappingUtils.getAsString(searchMap, "scientificNameAuthorship"));
searchTaxon=MappingUtils.getAsString(searchMap, "taxonRank");
taxon.setRank(searchTaxon);// rank_name
if (previousTaxon == null) {
previousTaxon = taxon;
} else {
previousTaxon.setParent(taxon);
taxons.add(previousTaxon);
previousTaxon = taxon;
}
} else {
end = true;
}
} else {
end = true;
}
}
if (taxons.isEmpty()) {
taxonToReturn = previousTaxon;
} else {
taxonToReturn = taxons.getFirst();
}
log.trace("[Benchmark] time to retrieve taxon is " + (System.currentTimeMillis() - start));
return taxonToReturn;
}
private List<Product> retrieveProducts(String taxonId, DataSet dataset) throws Exception {
long start = System.currentTimeMillis();
QueryCount occurrencesQuery = new QueryCount(baseURL, ResultType.Occurrence);
occurrencesQuery.setConditions(QueryCondition.cond("taxonid", taxonId),
QueryCondition.cond("datasetid", dataset.getId()));
occurrencesQuery.getConditions().addAll(this.queryConditions);
String productId = Utils.createProductsKey(Utils.getDataSetAsString(dataset), taxonId, this.queryConditions);
Product product = new Product(ProductType.Occurrence, productId);
product.setCount(occurrencesQuery.getCount());
log.trace("[Benchmark] time to retrieve product is " + (System.currentTimeMillis() - start));
return Arrays.asList(product);
}
}