package org.gcube.data.spd.obisplugin.search; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.gcube.data.spd.model.Condition; import org.gcube.data.spd.model.exceptions.StreamBlockingException; import org.gcube.data.spd.model.products.DataSet; import org.gcube.data.spd.model.products.Product; import org.gcube.data.spd.model.products.Product.ProductType; import org.gcube.data.spd.model.products.ResultItem; import org.gcube.data.spd.model.products.Taxon; import org.gcube.data.spd.obisplugin.Constants; import org.gcube.data.spd.obisplugin.search.query.MappingUtils; import org.gcube.data.spd.obisplugin.search.query.PagedQueryIterator; import org.gcube.data.spd.obisplugin.search.query.PagedQueryObject; import org.gcube.data.spd.obisplugin.search.query.QueryByIdentifier; import org.gcube.data.spd.obisplugin.search.query.QueryCondition; import org.gcube.data.spd.obisplugin.search.query.QueryCount; import org.gcube.data.spd.obisplugin.search.query.QueryType; import org.gcube.data.spd.obisplugin.search.query.ResultType; import org.gcube.data.spd.plugin.fwk.writers.ObjectWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class ResultItemSearch { private static final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); private static Logger log = LoggerFactory.getLogger(ResultItemSearch.class); private List queryConditions = new ArrayList(); private String baseURL; String searchQuery; public ResultItemSearch(String baseURL, String searchQuery, Condition... conditions) { this.baseURL = baseURL; this.searchQuery = searchQuery.replaceAll(" ", "%20").trim(); this.searchQuery = this.searchQuery.substring(0, 1).toUpperCase() + this.searchQuery.substring(1, this.searchQuery.length()).toLowerCase(); try { this.queryConditions = Utils.elaborateConditions(conditions); } catch (Exception e) { log.error("error elaborating conditions", e); } } public void search(ObjectWriter writer, int limit) { PagedQueryObject queryObject = new PagedQueryObject(baseURL, ResultType.Occurrence, limit); queryObject.setConditions(QueryCondition.cond("scientificname", searchQuery)); queryObject.getConditions().addAll(this.queryConditions); try { PagedQueryIterator pagedIterator = new PagedQueryIterator(queryObject) { Set alreadyVisited = new HashSet(); @Override protected ResultItem getObject(Map mappedObject) throws Exception { ResultItem resultItem = buildResult(mappedObject); log.debug("ResultItem: " + resultItem); return resultItem; } @Override protected boolean useIt(Map mappedObject) { String datasetKey = (String) mappedObject.get("dataset_id");// resourceID Integer taxonId = (Integer) mappedObject.get("aphiaID");// obisID String key = datasetKey + "|" + taxonId; if (alreadyVisited.contains(key)) return false; alreadyVisited.add(key); return true; } }; while (pagedIterator.hasNext() && writer.isAlive()) writer.write(pagedIterator.next()); } catch (Exception e) { log.error("error writing resultItems", e); writer.write(new StreamBlockingException(Constants.REPOSITORY_NAME)); } } ResultItem buildResult(Map singleObject) throws Exception { try { long start = System.currentTimeMillis(); // log.debug("Retrieved Occurence: "+singleObject); Integer taxonId = MappingUtils.getAsInteger(singleObject, "aphiaID");// obisID String scientificName = MappingUtils.getAsString(singleObject, "scientificName"); ResultItem resItem = new ResultItem(taxonId.toString(), scientificName); String scientificNameAuthorship = MappingUtils.getAsString(singleObject, "scientificNameAuthorship"); resItem.setScientificNameAuthorship(scientificNameAuthorship); resItem.setLsid(MappingUtils.getAsString(singleObject,"scientificNameID")); QueryByIdentifier query = new QueryByIdentifier(baseURL, taxonId.toString(), QueryType.Taxon); Map taxonData = MappingUtils.getObjectMapping(query.build()); log.debug("Retrieved taxon: " + taxonData); if (taxonData != null && !taxonData.isEmpty()) { @SuppressWarnings("unchecked") List> taxonList = (List>) taxonData.get("results"); if (taxonList != null && !taxonList.isEmpty()) { Map taxonSingle = taxonList.get(0); String taxonRank = MappingUtils.getAsString(taxonSingle, "taxonRank"); resItem.setRank(taxonRank);// rank_name resItem.setParent(retrieveParentTaxon(taxonId, taxonRank, taxonSingle));// parent_id } } DataSet dataset = DataSetRetreiver.get(baseURL, MappingUtils.getAsString(singleObject, "dataset_id")); resItem.setProvider(dataset.getDataProvider().getName()); resItem.setDataSet(dataset); List products = retrieveProducts(taxonId.toString(), dataset); resItem.setProducts(products); String credits = "Biodiversity occurrence accessed through OBIS WebService, https://api.obis.org/v3/, " + format.format(Calendar.getInstance().getTime()) + ")"; resItem.setCredits(credits); log.trace("[Benchmark] time to retrieve ResultItem is " + (System.currentTimeMillis() - start)); log.debug("found species: id={}, name={}", resItem.getId(), resItem.getScientificName()); return resItem; } catch (Exception e) { throw e; } } private Taxon retrieveParentTaxon(Integer taxonId, String taxonRank, Map taxonMap) throws Exception { log.debug("Call retrieve parentTaxon: [taxonId={}, taxonRank={}]",taxonId,taxonRank); if (taxonId == 0) return null; long start = System.currentTimeMillis(); LinkedList taxons = new LinkedList<>(); String searchTaxon = taxonRank; Map searchMap = taxonMap; Taxon previousTaxon = null; Taxon taxonToReturn = null; TaxonCategories currentTaxon=null; boolean end = false; while (!end) { currentTaxon = TaxonCategories.getTaxonCategory(searchTaxon); if (currentTaxon == null) { break; } boolean foundParentId = false; Integer parentId=null; while (!foundParentId&&!end) { TaxonCategories parentTaxon = TaxonCategories.getParent(currentTaxon); if (parentTaxon == null) { end=true; break; } String parentIdKey = parentTaxon.name().toLowerCase() + "id"; if (searchMap.containsKey(parentIdKey)) { parentId = MappingUtils.getAsInteger(searchMap, parentIdKey); currentTaxon=parentTaxon; foundParentId=true; }else { currentTaxon=parentTaxon; } } if(end||!foundParentId){ break; } log.debug("Found parentId: "+parentId); QueryByIdentifier query = new QueryByIdentifier(baseURL, parentId.toString(), QueryType.Taxon); Map parentTaxonData = MappingUtils.getObjectMapping(query.build()); log.debug("ParentTaxon: " + parentTaxonData); if (parentTaxonData != null && !parentTaxonData.isEmpty()) { @SuppressWarnings("unchecked") List> parentTaxonList = (List>) parentTaxonData.get("results"); if (parentTaxonList != null && !parentTaxonList.isEmpty()) { searchMap = parentTaxonList.get(0); Taxon taxon = new Taxon(MappingUtils.getAsInteger(searchMap, "taxonID").toString(), MappingUtils.getAsString(searchMap, "scientificName")); taxon.setScientificNameAuthorship(MappingUtils.getAsString(searchMap, "scientificNameAuthorship")); searchTaxon=MappingUtils.getAsString(searchMap, "taxonRank"); taxon.setRank(searchTaxon);// rank_name if (previousTaxon == null) { previousTaxon = taxon; } else { previousTaxon.setParent(taxon); taxons.add(previousTaxon); previousTaxon = taxon; } } else { end = true; } } else { end = true; } } if (taxons.isEmpty()) { taxonToReturn = previousTaxon; } else { taxonToReturn = taxons.getFirst(); } log.trace("[Benchmark] time to retrieve taxon is " + (System.currentTimeMillis() - start)); return taxonToReturn; } private List retrieveProducts(String taxonId, DataSet dataset) throws Exception { long start = System.currentTimeMillis(); QueryCount occurrencesQuery = new QueryCount(baseURL, ResultType.Occurrence); occurrencesQuery.setConditions(QueryCondition.cond("taxonid", taxonId), QueryCondition.cond("datasetid", dataset.getId())); occurrencesQuery.getConditions().addAll(this.queryConditions); String productId = Utils.createProductsKey(Utils.getDataSetAsString(dataset), taxonId, this.queryConditions); Product product = new Product(ProductType.Occurrence, productId); product.setCount(occurrencesQuery.getCount()); log.trace("[Benchmark] time to retrieve product is " + (System.currentTimeMillis() - start)); return Arrays.asList(product); } }