gbif-spd-plugin/src/main/java/org/gcube/data/spd/gbifplugin/search/ResultItemSearch.java

160 lines
6.3 KiB
Java

package org.gcube.data.spd.gbifplugin.search;
import static org.gcube.data.spd.gbifplugin.search.query.MappingUtils.getAsString;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.gcube.data.spd.gbifplugin.Constants;
import org.gcube.data.spd.gbifplugin.search.query.MappingUtils;
import org.gcube.data.spd.gbifplugin.search.query.PagedQueryIterator;
import org.gcube.data.spd.gbifplugin.search.query.PagedQueryObject;
import org.gcube.data.spd.gbifplugin.search.query.QueryByIdentifier;
import org.gcube.data.spd.gbifplugin.search.query.QueryCondition;
import org.gcube.data.spd.gbifplugin.search.query.QueryCount;
import org.gcube.data.spd.gbifplugin.search.query.QueryType;
import org.gcube.data.spd.gbifplugin.search.query.ResultType;
import org.gcube.data.spd.model.Condition;
import org.gcube.data.spd.model.exceptions.StreamBlockingException;
import org.gcube.data.spd.model.products.DataSet;
import org.gcube.data.spd.model.products.Product;
import org.gcube.data.spd.model.products.Product.ProductType;
import org.gcube.data.spd.model.products.ResultItem;
import org.gcube.data.spd.model.products.Taxon;
import org.gcube.data.spd.plugin.fwk.writers.ObjectWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ResultItemSearch {
private static final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
private static Logger log = LoggerFactory.getLogger(ResultItemSearch.class);
private List<QueryCondition> queryConditions = new ArrayList<QueryCondition>();
private String baseURL;
String searchQuery;
public ResultItemSearch(String baseURL, String searchQuery, Condition ... conditions){
this.baseURL = baseURL;
this.searchQuery = searchQuery.replaceAll(" ", "%20");
try{
this.queryConditions = Utils.elaborateConditions(conditions);
}catch(Exception e){
log.error("error elaborating conditions",e);
}
}
public void search(ObjectWriter<ResultItem> writer, int limit){
PagedQueryObject queryObject = new PagedQueryObject(baseURL, ResultType.Occurrence,limit);
queryObject.setConditions(QueryCondition.cond("scientificName",searchQuery));
try{
PagedQueryIterator<ResultItem> pagedIterator = new PagedQueryIterator<ResultItem>(queryObject) {
Set<String> alreadyVisited =new HashSet<String>();
@Override
protected ResultItem getObject(Map<String, Object> mappedObject) throws Exception {
return buildResult(mappedObject);
}
@Override
protected boolean useIt(Map<String, Object> mappedObject) {
String datasetKey = getAsString(mappedObject,"datasetKey");
Integer taxonId = (Integer)mappedObject.get("taxonKey");
String key = datasetKey+"|"+taxonId;
if (alreadyVisited.contains(key))
return false;
alreadyVisited.add(key);
return true;
}
};
while (pagedIterator.hasNext() && writer.isAlive())
writer.write(pagedIterator.next());
}catch(Exception e){
log.error("error writing resultItems",e);
writer.write(new StreamBlockingException(Constants.REPOSITORY_NAME));
}
}
ResultItem buildResult(Map<String,Object> singleObject) throws Exception{
long start = System.currentTimeMillis();
Integer taxonId = (Integer)singleObject.get("taxonKey");
String scientificName = getAsString(singleObject,"species");
ResultItem resItem = new ResultItem(taxonId.toString(), scientificName );
resItem.setParent(retrieveTaxon(taxonId.toString()));
resItem.setScientificNameAuthorship(retrieveAuthorship(taxonId.toString()));
resItem.setRank(getAsString(singleObject, "taxonRank"));
resItem.setCitation(getAsString(singleObject,"institutionCode"));
DataSet dataset = DataSetRetreiver.get(getAsString(singleObject,"datasetKey"), baseURL);
resItem.setDataSet(dataset);
List<Product> products = retrieveProducts(taxonId.toString(), dataset);
resItem.setProducts(products);
String credits = "Biodiversity occurrence data published by: "+dataset.getDataProvider().getName()+" (Accessed through GBIF Data Portal, data.gbif.org, "+format.format(Calendar.getInstance().getTime())+")";
resItem.setCredits(credits);
log.trace("[Benchmark] time to retrieve ResultItem is "+(System.currentTimeMillis()-start));
return resItem;
}
private String retrieveAuthorship(String taxonId) throws Exception {
QueryByIdentifier query = new QueryByIdentifier(baseURL, taxonId, QueryType.Taxon);
Map<String, Object> mapping = MappingUtils.getObjectMapping(query.build());
if (mapping.containsKey("authorship"))
return getAsString(mapping, "authorship");
else return "";
}
private Taxon retrieveTaxon(String taxonId) throws Exception {
long start = System.currentTimeMillis();
QueryByIdentifier query = new QueryByIdentifier(baseURL, taxonId, QueryType.Taxon);
query.addPath("parents");
LinkedList<HashMap<String, Object>> parentsList = MappingUtils.getObjectList(query.build());
Taxon parentTaxon = null;
for(HashMap<String, Object> mappedObject : parentsList){
Taxon taxon = new Taxon(((Integer)mappedObject.get("key")).toString(), getAsString(mappedObject, "scientificName"));
taxon.setCitation(getAsString(mappedObject, "accordingTo"));
taxon.setRank(getAsString(mappedObject, "rank"));
if (parentTaxon!=null)
taxon.setParent(parentTaxon);
parentTaxon = taxon;
}
log.trace("[Benchmark] time to retrieve taxon is "+(System.currentTimeMillis()-start));
return parentTaxon;
}
private List<Product> retrieveProducts( String taxonId, DataSet dataset, Condition ... properties) throws Exception{
long start = System.currentTimeMillis();
QueryCount occurrencesQuery = new QueryCount(baseURL, ResultType.Occurrence);
occurrencesQuery.setConditions(QueryCondition.cond("taxonKey",taxonId), QueryCondition.cond("datasetKey", dataset.getId()), QueryCondition.cond("hasCoordinate","true"));
String productId = Utils.createProductsKey(Utils.getDataSetAsString(dataset), taxonId, this.queryConditions);
Product product = new Product(ProductType.Occurrence, productId);
product.setCount(occurrencesQuery.getCount());
log.trace("[Benchmark] time to retrieve product is "+(System.currentTimeMillis()-start));
return Arrays.asList(product);
}
}