From 6bab24463237f53dfeeea1cd6d88113e79d5bf54 Mon Sep 17 00:00:00 2001 From: Lucio Lelii Date: Thu, 2 Feb 2017 10:26:44 +0000 Subject: [PATCH] release 4.3 git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/branches/data-access/gbif-spd-plugin/1.9@142049 82a268e6-3cf1-43bd-a215-b396298e98cf --- .classpath | 10 + .project | 23 ++ .settings/org.eclipse.core.resources.prefs | 7 + .settings/org.eclipse.jdt.core.prefs | 6 + .settings/org.eclipse.m2e.core.prefs | 5 + distro/LICENSE | 1 + distro/README | 61 +++++ distro/changelog.xml | 30 +++ distro/descriptor.xml | 31 +++ distro/profile.xml | 28 +++ pom.xml | 149 ++++++++++++ .../gcube/data/spd/gbifplugin/Constants.java | 22 ++ .../gcube/data/spd/gbifplugin/GBIFPlugin.java | 84 +++++++ .../OccurrencesCapabilityImpl.java | 110 +++++++++ .../gbifplugin/search/DataSetRetreiver.java | 43 ++++ .../gbifplugin/search/OccurrenceSearch.java | 213 ++++++++++++++++++ .../spd/gbifplugin/search/ProductKey.java | 20 ++ .../gbifplugin/search/ResultItemSearch.java | 159 +++++++++++++ .../data/spd/gbifplugin/search/Utils.java | 136 +++++++++++ .../gbifplugin/search/query/MappingUtils.java | 85 +++++++ .../search/query/PagedQueryIterator.java | 85 +++++++ .../search/query/PagedQueryObject.java | 47 ++++ .../search/query/QueryByIdentifier.java | 35 +++ .../search/query/QueryCondition.java | 20 ++ .../gbifplugin/search/query/QueryCount.java | 54 +++++ .../gbifplugin/search/query/QueryType.java | 17 ++ .../gbifplugin/search/query/ResultType.java | 15 ++ ...g.gcube.data.spd.plugin.fwk.AbstractPlugin | 1 + .../org/gcube/data/spd/gbif/QueryTest.java | 139 ++++++++++++ src/test/resources/log4j.properties | 10 + 30 files changed, 1646 insertions(+) create mode 100644 .classpath create mode 100644 .project create mode 100644 .settings/org.eclipse.core.resources.prefs create mode 100644 .settings/org.eclipse.jdt.core.prefs create mode 100644 .settings/org.eclipse.m2e.core.prefs create mode 100644 distro/LICENSE create mode 100644 distro/README create mode 100644 distro/changelog.xml create mode 100644 distro/descriptor.xml create mode 100644 distro/profile.xml create mode 100644 pom.xml create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/Constants.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/GBIFPlugin.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/capabilities/OccurrencesCapabilityImpl.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/DataSetRetreiver.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/OccurrenceSearch.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/ProductKey.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/ResultItemSearch.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/Utils.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/query/MappingUtils.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/query/PagedQueryIterator.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/query/PagedQueryObject.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryByIdentifier.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryCondition.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryCount.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryType.java create mode 100644 src/main/java/org/gcube/data/spd/gbifplugin/search/query/ResultType.java create mode 100644 src/main/resources/META-INF/services/org.gcube.data.spd.plugin.fwk.AbstractPlugin create mode 100644 src/test/java/org/gcube/data/spd/gbif/QueryTest.java create mode 100644 src/test/resources/log4j.properties diff --git a/.classpath b/.classpath new file mode 100644 index 0000000..517bf32 --- /dev/null +++ b/.classpath @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/.project b/.project new file mode 100644 index 0000000..f1a5a34 --- /dev/null +++ b/.project @@ -0,0 +1,23 @@ + + + gbif-plugin + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..ad6cf5d --- /dev/null +++ b/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,7 @@ +#Tue Feb 14 11:06:25 CET 2012 +eclipse.preferences.version=1 +encoding//src/main/java=UTF-8 +encoding//src/main/resources=UTF-8 +encoding//src/test/java=UTF-8 +encoding//src/test/resources=UTF-8 +encoding/=UTF-8 diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..5d06b25 --- /dev/null +++ b/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,6 @@ +#Tue Feb 14 11:06:25 CET 2012 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 +org.eclipse.jdt.core.compiler.compliance=1.6 +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.source=1.6 diff --git a/.settings/org.eclipse.m2e.core.prefs b/.settings/org.eclipse.m2e.core.prefs new file mode 100644 index 0000000..7a7b954 --- /dev/null +++ b/.settings/org.eclipse.m2e.core.prefs @@ -0,0 +1,5 @@ +#Mon Feb 13 12:19:08 CET 2012 +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 diff --git a/distro/LICENSE b/distro/LICENSE new file mode 100644 index 0000000..3695e26 --- /dev/null +++ b/distro/LICENSE @@ -0,0 +1 @@ +${gcube.license} diff --git a/distro/README b/distro/README new file mode 100644 index 0000000..edf22f4 --- /dev/null +++ b/distro/README @@ -0,0 +1,61 @@ +The gCube System - ${name} +-------------------------------------------------- + +${description} + + +${gcube.description} + +${gcube.funding} + + +Version +-------------------------------------------------- + +${version} (${buildDate}) + +Please see the file named "changelog.xml" in this directory for the release notes. + + +Authors +-------------------------------------------------- + +* Lucio Lelii (lucio.lelii-AT-isti.cnr.it), CNR, Italy + +MAINTAINERS +-------------------------------------------------- + +* Lucio Lelii (lucio.lelii-AT-isti.cnr.it), CNR, Italy + +Download information +-------------------------------------------------- + +Source code is available from SVN: + ${scm.url} + +Binaries can be downloaded from the gCube website: + ${gcube.website} + + +Installation +-------------------------------------------------- + +Installation documentation is available on-line in the gCube Wiki: + ${gcube.wikiRoot} + +Documentation +-------------------------------------------------- + +Documentation is available on-line in the gCube Wiki: + ${gcube.wikiRoot} +Support +-------------------------------------------------- + +Bugs and support requests can be reported in the gCube issue tracking tool: + ${gcube.issueTracking} + + +Licensing +-------------------------------------------------- + +This software is licensed under the terms you may find in the file named "LICENSE" in this directory. diff --git a/distro/changelog.xml b/distro/changelog.xml new file mode 100644 index 0000000..6a20654 --- /dev/null +++ b/distro/changelog.xml @@ -0,0 +1,30 @@ + + + GBIF plugin for species product discovery first release + + + bug fixing + + + info for repository added + added more credits on occurrences + + + porting to the spd fwk 1.2.0 + + + porting to the spd fwk 2.0.0 + + + porting to the spd fwk 2.2.0 + + + Sync with spd-model-library changes + + + new matching elements for basis of record added + + + gCore dependencies removed + + \ No newline at end of file diff --git a/distro/descriptor.xml b/distro/descriptor.xml new file mode 100644 index 0000000..67b0c2c --- /dev/null +++ b/distro/descriptor.xml @@ -0,0 +1,31 @@ + + servicearchive + + tar.gz + + / + + + ${distroDirectory} + / + true + + README + LICENSE + profile.xml + changelog.xml + + 755 + true + + + + + target/${build.finalName}.jar + /${artifactId} + + + \ No newline at end of file diff --git a/distro/profile.xml b/distro/profile.xml new file mode 100644 index 0000000..62b13c8 --- /dev/null +++ b/distro/profile.xml @@ -0,0 +1,28 @@ + + + + Service + + ${description} + DataAccess + ${artifactId} + 1.0.0 + + + ${description} + ${artifactId} + ${version} + + ${groupId} + ${artifactId} + ${version} + + library + + ${build.finalName}.jar + + + + + + diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..8362ed5 --- /dev/null +++ b/pom.xml @@ -0,0 +1,149 @@ + + 4.0.0 + + + maven-parent + org.gcube.tools + 1.0.0 + + + + org.gcube.data.spd + gbif-spd-plugin + 1.9.0-SNAPSHOT + GBIFPlugin + gbif plugin for species manager service + + + ${project.basedir}/distro + 2.4.4 + + + + + + org.gcube.distribution + gcube-bom + LATEST + pom + import + + + + + + + + + org.projectlombok + lombok + 1.16.2 + provided + + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-annotations + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + + + com.sun.jersey + jersey-client + 1.19 + + + + org.gcube.data.spd + spd-plugin-framework + [3.0.0-SNAPSHOT, 4.0.0-SNAPSHOT) + + + + + junit + junit + 4.11 + test + + + + ch.qos.logback + logback-classic + 1.0.13 + test + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + 1.7 + 1.7 + 1.7 + + + + org.apache.maven.plugins + maven-resources-plugin + 2.5 + + + copy-profile + install + + copy-resources + + + target + + + ${distroDirectory} + true + + profile.xml + + + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.2 + + + ${distroDirectory}/descriptor.xml + + + + + servicearchive + install + + single + + + + + + + \ No newline at end of file diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/Constants.java b/src/main/java/org/gcube/data/spd/gbifplugin/Constants.java new file mode 100644 index 0000000..ac1711b --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/Constants.java @@ -0,0 +1,22 @@ +package org.gcube.data.spd.gbifplugin; + +import javax.xml.namespace.QName; + +public class Constants { + + //public static final String BASE_URL = "http://api.gbif.org/v1"; + + public static final int QUERY_LIMIT = 200; + + public static final QName GBIFKEY_ATTR= new QName("gbifKey"); + public static final QName ABOUT_ATTR= new QName("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "about"); + public static final QName RESOURCE_ATTR= new QName("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "resource"); + public static final QName TOTAL_MATCHED_ATTR = new QName("totalMatched"); + + + + public static final String CHILD_RELATIONSHIP_VALUE = "http://rs.tdwg.org/ontology/voc/TaxonConcept#IsChildTaxonOf"; + + public static final String REPOSITORY_NAME="GBIF"; + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/GBIFPlugin.java b/src/main/java/org/gcube/data/spd/gbifplugin/GBIFPlugin.java new file mode 100644 index 0000000..655acd2 --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/GBIFPlugin.java @@ -0,0 +1,84 @@ +package org.gcube.data.spd.gbifplugin; + +import java.util.Collections; +import java.util.Set; + +import org.gcube.common.resources.gcore.ServiceEndpoint; +import org.gcube.data.spd.gbifplugin.capabilities.OccurrencesCapabilityImpl; +import org.gcube.data.spd.gbifplugin.search.ResultItemSearch; +import org.gcube.data.spd.model.Condition; +import org.gcube.data.spd.model.RepositoryInfo; +import org.gcube.data.spd.model.exceptions.StreamBlockingException; +import org.gcube.data.spd.model.products.ResultItem; +import org.gcube.data.spd.model.util.Capabilities; +import org.gcube.data.spd.plugin.fwk.AbstractPlugin; +import org.gcube.data.spd.plugin.fwk.capabilities.OccurrencesCapability; +import org.gcube.data.spd.plugin.fwk.writers.ObjectWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class GBIFPlugin extends AbstractPlugin{ + + private static Logger logger = LoggerFactory.getLogger(GBIFPlugin.class); + + + private OccurrencesCapability occurrencesCapability; + private String baseURL; + + + @Override + public void initialize(ServiceEndpoint resource) throws Exception { + baseURL = resource.profile().accessPoints().iterator().next().address(); + occurrencesCapability = new OccurrencesCapabilityImpl(baseURL); + setUseCache(true); + super.initialize(resource); + } + + @Override + public Set getSupportedCapabilities() { + return Collections.singleton(Capabilities.Occurrence); + } + + + + + @Override + public OccurrencesCapability getOccurrencesInterface() { + return occurrencesCapability; + } + + @Override + public String getRepositoryName() { + return "GBIF"; + } + + @Override + public String getDescription() { + return "A plugin for GBIF interaction"; + } + + @Override + public void searchByScientificName(String word, ObjectWriter writer, + Condition... properties) { + logger.debug("starting the search for gbifPlugin with word "+word); + try { + new ResultItemSearch(baseURL, word, properties).search(writer,Constants.QUERY_LIMIT); + } catch (Exception e) { + logger.debug("searchByScientificName failed",e); + writer.write(new StreamBlockingException(Constants.REPOSITORY_NAME, word)); + } + } + + + @Override + public RepositoryInfo getRepositoryInfo() { + return new RepositoryInfo( + "http://www.gbif.org/fileadmin/templates/main/images/logo_leaf.gif", + "http://www.gbif.org/", + "The Global Biodiversity Information Facility (GBIF) was established by governments in 2001 to encourage free and open access to biodiversity data, " + + "via the Internet. Through a global network of countries and organizations, GBIF promotes and facilitates the mobilization, access, " + + "discovery and use of information about the occurrence of organisms over time and across the planet"); + } + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/capabilities/OccurrencesCapabilityImpl.java b/src/main/java/org/gcube/data/spd/gbifplugin/capabilities/OccurrencesCapabilityImpl.java new file mode 100644 index 0000000..2d413f5 --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/capabilities/OccurrencesCapabilityImpl.java @@ -0,0 +1,110 @@ +package org.gcube.data.spd.gbifplugin.capabilities; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import org.gcube.data.spd.gbifplugin.Constants; +import org.gcube.data.spd.gbifplugin.search.OccurrenceSearch; +import org.gcube.data.spd.model.Condition; +import org.gcube.data.spd.model.Conditions; +import org.gcube.data.spd.model.exceptions.StreamBlockingException; +import org.gcube.data.spd.model.exceptions.StreamNonBlockingException; +import org.gcube.data.spd.model.products.OccurrencePoint; +import org.gcube.data.spd.plugin.fwk.capabilities.OccurrencesCapability; +import org.gcube.data.spd.plugin.fwk.writers.ClosableWriter; +import org.gcube.data.spd.plugin.fwk.writers.ObjectWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class OccurrencesCapabilityImpl extends OccurrencesCapability{ + + + private static Logger logger = LoggerFactory.getLogger(OccurrencesCapabilityImpl.class); + + private String baseUrl; + + + + public OccurrencesCapabilityImpl(String baseUrl) { + super(); + this.baseUrl = baseUrl; + } + + + + @SuppressWarnings("serial") + @Override + public Set getSupportedProperties() { + return new HashSet(){{ + add(Conditions.DATE); + add(Conditions.COORDINATE); + + }}; + } + + + + @Override + public void searchByScientificName(String word, + ObjectWriter writer, Condition... properties) { + try{ + new OccurrenceSearch(baseUrl).search(writer, word, Constants.QUERY_LIMIT, properties); + } catch (Exception e) { + logger.debug("search occurrences by ScientificName failed",e); + } + } + + @Override + public void getOccurrencesByProductKeys( + ClosableWriter writer, Iterator keys) { + OccurrenceSearch occSearch = null; + try{ + occSearch = new OccurrenceSearch(baseUrl); + }catch (Exception e) { + logger.error("error contacting gbif server"); + writer.write(new StreamBlockingException(Constants.REPOSITORY_NAME)); + return; + } + while (keys.hasNext()){ + String key = keys.next(); + try{ + occSearch.searchByKey(writer, key, Constants.QUERY_LIMIT); + }catch (Exception e) { + logger.warn("error retrieving key "+key, e); + writer.write(new StreamNonBlockingException(Constants.REPOSITORY_NAME,key)); + } + } + writer.close(); + } + + + + @Override + public void getOccurrencesByIds(ClosableWriter writer, + Iterator ids) { + OccurrenceSearch occSearch = null; + try{ + occSearch = new OccurrenceSearch(baseUrl); + }catch (Exception e) { + logger.error("error contacting gbif server"); + writer.write(new StreamBlockingException(Constants.REPOSITORY_NAME)); + return; + } + while (ids.hasNext()){ + String id = ids.next(); + try{ + if (!writer.isAlive()){ + logger.trace("the writer is closed"); + return; + }else writer.write(occSearch.searchById(id)); + }catch (Exception e) { + logger.warn("error retrieving id "+id,e); + writer.write(new StreamNonBlockingException(Constants.REPOSITORY_NAME,id)); + } + } + writer.close(); + } + + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/DataSetRetreiver.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/DataSetRetreiver.java new file mode 100644 index 0000000..30231d8 --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/DataSetRetreiver.java @@ -0,0 +1,43 @@ +package org.gcube.data.spd.gbifplugin.search; + +import static org.gcube.data.spd.gbifplugin.search.query.MappingUtils.getAsString; + +import java.util.Map; + +import org.gcube.data.spd.gbifplugin.search.query.MappingUtils; +import org.gcube.data.spd.gbifplugin.search.query.QueryByIdentifier; +import org.gcube.data.spd.gbifplugin.search.query.QueryType; +import org.gcube.data.spd.model.products.DataProvider; +import org.gcube.data.spd.model.products.DataSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DataSetRetreiver { + + private static Logger log = LoggerFactory.getLogger(DataSetRetreiver.class); + + @SuppressWarnings("unchecked") + public static DataSet get(String key, String baseURL) throws Exception{ + long start = System.currentTimeMillis(); + QueryByIdentifier datasetQuery = new QueryByIdentifier(baseURL, key, QueryType.Dataset); + Map mapping = MappingUtils.getObjectMapping(datasetQuery.build()); + DataSet dataset = new DataSet(key); + dataset.setName(getAsString(mapping,"title")); + dataset.setCitation(getAsString((Map)mapping.get("citation"),"text")); + String providerKey = getAsString(mapping,"publishingOrganizationKey"); + dataset.setDataProvider(getDataProvider(providerKey, baseURL)); + log.trace("[Benchmark] time to retrieve dataset is "+(System.currentTimeMillis()-start)); + return dataset; + } + + private static DataProvider getDataProvider(String key, String baseURL) throws Exception{ + QueryByIdentifier datasetQuery = new QueryByIdentifier(baseURL, key, QueryType.Organization); + Map mapping = MappingUtils.getObjectMapping(datasetQuery.build()); + + DataProvider dataProvider = new DataProvider(key); + dataProvider.setName(getAsString(mapping, "title")); + return dataProvider; + + } + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/OccurrenceSearch.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/OccurrenceSearch.java new file mode 100644 index 0000000..9ed9730 --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/OccurrenceSearch.java @@ -0,0 +1,213 @@ +package org.gcube.data.spd.gbifplugin.search; + +import static org.gcube.data.spd.gbifplugin.search.query.MappingUtils.getAsCalendar; +import static org.gcube.data.spd.gbifplugin.search.query.MappingUtils.getAsDouble; +import static org.gcube.data.spd.gbifplugin.search.query.MappingUtils.getAsInteger; +import static org.gcube.data.spd.gbifplugin.search.query.MappingUtils.getAsString; + +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.gcube.data.spd.gbifplugin.Constants; +import org.gcube.data.spd.gbifplugin.search.query.MappingUtils; +import org.gcube.data.spd.gbifplugin.search.query.PagedQueryIterator; +import org.gcube.data.spd.gbifplugin.search.query.PagedQueryObject; +import org.gcube.data.spd.gbifplugin.search.query.QueryByIdentifier; +import org.gcube.data.spd.gbifplugin.search.query.QueryCondition; +import org.gcube.data.spd.gbifplugin.search.query.QueryType; +import org.gcube.data.spd.gbifplugin.search.query.ResultType; +import org.gcube.data.spd.model.BasisOfRecord; +import org.gcube.data.spd.model.Condition; +import org.gcube.data.spd.model.exceptions.StreamBlockingException; +import org.gcube.data.spd.model.products.DataSet; +import org.gcube.data.spd.model.products.OccurrencePoint; +import org.gcube.data.spd.plugin.fwk.writers.ObjectWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class OccurrenceSearch { + + private static Logger log = LoggerFactory.getLogger(OccurrenceSearch.class); + + private String baseURL; + + private final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); + + public OccurrenceSearch(String baseURL) { + this.baseURL = baseURL; + } + + public void search(ObjectWriter writer, String scientificName, int limit, Condition ...conditions) throws Exception{ + PagedQueryObject occurrencesQuery = new PagedQueryObject(baseURL, ResultType.Occurrence, limit); + List queryConditions = Utils.elaborateConditions(conditions); + occurrencesQuery.setConditions(QueryCondition.cond("scientificName",scientificName.replaceAll(" ", "%20")), QueryCondition.cond("hasCoordinate","true")); + occurrencesQuery.getConditions().addAll(queryConditions); + + writeElements(writer, occurrencesQuery, null); + } + + public void searchByKey(ObjectWriter writer, String key, int limit) throws Exception{ + PagedQueryObject occurrencesQuery = new PagedQueryObject(baseURL, ResultType.Occurrence, limit); + ProductKey productKey = Utils.elaborateProductsKey(key); + occurrencesQuery.getConditions().addAll(productKey.getQueryCondition()); + occurrencesQuery.getConditions().add( QueryCondition.cond("hasCoordinate","true")); + writeElements(writer, occurrencesQuery, productKey.getDataset()); + } + + private void writeElements(ObjectWriter writer, PagedQueryObject occurrencesQuery, final DataSet dataset){ + PagedQueryIterator pagedIterator = new PagedQueryIterator(occurrencesQuery) { + + @Override + protected OccurrencePoint getObject(Map mappedObject) + throws Exception { + OccurrencePoint op = retrieveElement(mappedObject); + if (dataset!=null){ + Calendar now = Calendar.getInstance(); + String credits = "Biodiversity occurrence data published by: "+dataset.getDataProvider().getName()+" (Accessed through GBIF Data Portal, data.gbif.org, "+format.format(now.getTime())+")"; + op.setCredits(credits); + op.setDataSet(dataset); + } + return op; + } + + }; + + try{ + while (pagedIterator.hasNext() && writer.isAlive()) + writer.write(pagedIterator.next()); + }catch(Exception e){ + log.error("error writing occurrences",e); + writer.write(new StreamBlockingException(Constants.REPOSITORY_NAME)); + } + } + + public OccurrencePoint searchById(String id) throws Exception{ + QueryByIdentifier queryByIdentifier = new QueryByIdentifier(baseURL, id, QueryType.Occurrence); + return retrieveElement(MappingUtils.getObjectMapping(queryByIdentifier.build())); + } + + + /* + FOSSIL_SPECIMEN + An occurrence record describing a fossilized specimen. + HUMAN_OBSERVATION + An occurrence record describing an observation made by one or more people. + LITERATURE + An occurrence record based on literature alone. + LIVING_SPECIMEN + An occurrence record describing a living specimen, e.g. + MACHINE_OBSERVATION + An occurrence record describing an observation made by a machine. + MATERIAL_SAMPLE + An occurrence record based on samples taken from other specimens or the environment. + OBSERVATION + An occurrence record describing an observation. + PRESERVED_SPECIMEN + An occurrence record describing a preserved specimen. + UNKNOWN + */ + + private static BasisOfRecord matchBasisOfRecord(String value){ + if (value.equals("PRESERVED_SPECIMEN")) return BasisOfRecord.PreservedSpecimen; + else if (value.equals("HUMAN_OBSERVATION")) return BasisOfRecord.HumanObservation; + else if (value.equals("FOSSIL_SPECIMEN")) return BasisOfRecord.FossilSpecimen; + else if (value.equals("MACHINE_OBSERVATION")) return BasisOfRecord.MachineObservation; + else if (value.equals("LIVING_SPECIMEN")) return BasisOfRecord.LivingSpecimen; + else if (value.equals("OBSERVATION")) return BasisOfRecord.Observation; + else if (value.equals("MATERIAL_SAMPLE")) return BasisOfRecord.MaterialSample; + else if (value.equals("LITERATURE")) return BasisOfRecord.Literature; + return BasisOfRecord.Unknown; + } + + + + private OccurrencePoint retrieveElement(Map mappedObj) throws Exception{ + long start = System.currentTimeMillis(); + String occurrenceId = getAsInteger(mappedObj, "key").toString(); + OccurrencePoint occurrence = new OccurrencePoint(occurrenceId); + + occurrence.setCollectionCode(getAsString(mappedObj, "collectionCode")); + occurrence.setInstitutionCode(getAsString(mappedObj, "institutionCode")); + occurrence.setCatalogueNumber(getAsString(mappedObj, "catalogNumber")); + occurrence.setRecordedBy(getAsString(mappedObj, "recordedBy")); + occurrence.setIdentifiedBy(getAsString(mappedObj, "identifiedBy")); + occurrence.setCountry(getAsString(mappedObj, "country")); + occurrence.setLocality(getAsString(mappedObj, "locality")); + + Calendar eventDate = getAsCalendar(mappedObj, "eventDate"); + if (eventDate==null) + eventDate =getAsCalendar(mappedObj,"dateIdentified"); + + occurrence.setEventDate(eventDate); + + occurrence.setDecimalLatitude(getAsDouble(mappedObj, "decimalLatitude")); + occurrence.setDecimalLongitude(getAsDouble(mappedObj, "decimalLongitude")); + + occurrence.setBasisOfRecord(matchBasisOfRecord(getAsString(mappedObj, "basisOfRecord"))); + + occurrence.setMinDepth(getAsDouble(mappedObj, "elevation")); + occurrence.setMaxDepth(getAsDouble(mappedObj, "depth")); + + String taxonKey = getAsInteger(mappedObj, "taxonKey").toString(); + ReducedTaxon rt = retrieveParentTaxon(taxonKey); + occurrence.setKingdom(rt.getKingdom()); + occurrence.setFamily(rt.getFamily()); + + QueryByIdentifier taxonQuery = new QueryByIdentifier(baseURL, taxonKey , QueryType.Taxon); + Map taxon = MappingUtils.getObjectMapping(taxonQuery.build()); + occurrence.setScientificNameAuthorship(getAsString(taxon, "authorship")); + occurrence.setScientificName(getAsString(taxon, "scientificName")); + occurrence.setCitation(getAsString(taxon, "accordingTo")); + log.trace("[Benchmark] time to retrieve occurrence is "+(System.currentTimeMillis()-start)); + return occurrence; + } + + + private ReducedTaxon retrieveParentTaxon(String taxonId) throws Exception { + long start = System.currentTimeMillis(); + QueryByIdentifier query = new QueryByIdentifier(baseURL, taxonId, QueryType.Taxon); + query.addPath("parents"); + LinkedList> parentsList = MappingUtils.getObjectList(query.build()); + ReducedTaxon taxon = new ReducedTaxon(); + for(HashMap mappedObject : parentsList){ + String rank = getAsString(mappedObject, "rank"); + String value = getAsString(mappedObject, "scientificName"); + if (rank.equalsIgnoreCase("family")) + taxon.setFamily(value); + else if (rank.equalsIgnoreCase("kingdom")) + taxon.setKingdom(value); + if (taxon.isValid()) + return taxon; + } + log.trace("[Benchmark] time to retrieve taxon is "+(System.currentTimeMillis()-start)); + return taxon; + } + + + protected static class ReducedTaxon{ + + private String family = null; + private String kingdom = null; + public String getFamily() { + return family; + } + public void setFamily(String family) { + this.family = family; + } + public String getKingdom() { + return kingdom; + } + public void setKingdom(String kingdom) { + this.kingdom = kingdom; + } + + public boolean isValid(){ + return family!=null && kingdom!=null; + } + } + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/ProductKey.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/ProductKey.java new file mode 100644 index 0000000..4781c26 --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/ProductKey.java @@ -0,0 +1,20 @@ +package org.gcube.data.spd.gbifplugin.search; + +import java.util.List; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +import org.gcube.data.spd.gbifplugin.search.query.QueryCondition; +import org.gcube.data.spd.model.products.DataSet; + +@AllArgsConstructor +@Getter +public class ProductKey { + + private List queryCondition; + private DataSet dataset; + + + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/ResultItemSearch.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/ResultItemSearch.java new file mode 100644 index 0000000..f757ecb --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/ResultItemSearch.java @@ -0,0 +1,159 @@ +package org.gcube.data.spd.gbifplugin.search; + +import static org.gcube.data.spd.gbifplugin.search.query.MappingUtils.getAsString; + +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Calendar; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.gcube.data.spd.gbifplugin.Constants; +import org.gcube.data.spd.gbifplugin.search.query.MappingUtils; +import org.gcube.data.spd.gbifplugin.search.query.PagedQueryIterator; +import org.gcube.data.spd.gbifplugin.search.query.PagedQueryObject; +import org.gcube.data.spd.gbifplugin.search.query.QueryByIdentifier; +import org.gcube.data.spd.gbifplugin.search.query.QueryCondition; +import org.gcube.data.spd.gbifplugin.search.query.QueryCount; +import org.gcube.data.spd.gbifplugin.search.query.QueryType; +import org.gcube.data.spd.gbifplugin.search.query.ResultType; +import org.gcube.data.spd.model.Condition; +import org.gcube.data.spd.model.exceptions.StreamBlockingException; +import org.gcube.data.spd.model.products.DataSet; +import org.gcube.data.spd.model.products.Product; +import org.gcube.data.spd.model.products.Product.ProductType; +import org.gcube.data.spd.model.products.ResultItem; +import org.gcube.data.spd.model.products.Taxon; +import org.gcube.data.spd.plugin.fwk.writers.ObjectWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ResultItemSearch { + + private static final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); + + private static Logger log = LoggerFactory.getLogger(ResultItemSearch.class); + + private List queryConditions = new ArrayList(); + + private String baseURL; + + String searchQuery; + + public ResultItemSearch(String baseURL, String searchQuery, Condition ... conditions){ + this.baseURL = baseURL; + this.searchQuery = searchQuery.replaceAll(" ", "%20"); + try{ + this.queryConditions = Utils.elaborateConditions(conditions); + }catch(Exception e){ + log.error("error elaborating conditions",e); + } + } + + public void search(ObjectWriter writer, int limit){ + PagedQueryObject queryObject = new PagedQueryObject(baseURL, ResultType.Occurrence,limit); + queryObject.setConditions(QueryCondition.cond("scientificName",searchQuery)); + try{ + PagedQueryIterator pagedIterator = new PagedQueryIterator(queryObject) { + + Set alreadyVisited =new HashSet(); + + @Override + protected ResultItem getObject(Map mappedObject) throws Exception { + return buildResult(mappedObject); + } + + @Override + protected boolean useIt(Map mappedObject) { + String datasetKey = getAsString(mappedObject,"datasetKey"); + Integer taxonId = (Integer)mappedObject.get("taxonKey"); + String key = datasetKey+"|"+taxonId; + if (alreadyVisited.contains(key)) + return false; + alreadyVisited.add(key); + return true; + } + + }; + + while (pagedIterator.hasNext() && writer.isAlive()) + writer.write(pagedIterator.next()); + + }catch(Exception e){ + log.error("error writing resultItems",e); + writer.write(new StreamBlockingException(Constants.REPOSITORY_NAME)); + } + + + } + + ResultItem buildResult(Map singleObject) throws Exception{ + long start = System.currentTimeMillis(); + Integer taxonId = (Integer)singleObject.get("taxonKey"); + String scientificName = getAsString(singleObject,"species"); + ResultItem resItem = new ResultItem(taxonId.toString(), scientificName ); + + resItem.setParent(retrieveTaxon(taxonId.toString())); + + resItem.setScientificNameAuthorship(retrieveAuthorship(taxonId.toString())); + + resItem.setRank(getAsString(singleObject, "taxonRank")); + + resItem.setCitation(getAsString(singleObject,"institutionCode")); + + DataSet dataset = DataSetRetreiver.get(getAsString(singleObject,"datasetKey"), baseURL); + resItem.setDataSet(dataset); + + List products = retrieveProducts(taxonId.toString(), dataset); + resItem.setProducts(products); + + String credits = "Biodiversity occurrence data published by: "+dataset.getDataProvider().getName()+" (Accessed through GBIF Data Portal, data.gbif.org, "+format.format(Calendar.getInstance().getTime())+")"; + resItem.setCredits(credits); + log.trace("[Benchmark] time to retrieve ResultItem is "+(System.currentTimeMillis()-start)); + return resItem; + } + + private String retrieveAuthorship(String taxonId) throws Exception { + QueryByIdentifier query = new QueryByIdentifier(baseURL, taxonId, QueryType.Taxon); + Map mapping = MappingUtils.getObjectMapping(query.build()); + if (mapping.containsKey("authorship")) + return getAsString(mapping, "authorship"); + else return ""; + } + + private Taxon retrieveTaxon(String taxonId) throws Exception { + long start = System.currentTimeMillis(); + QueryByIdentifier query = new QueryByIdentifier(baseURL, taxonId, QueryType.Taxon); + query.addPath("parents"); + LinkedList> parentsList = MappingUtils.getObjectList(query.build()); + Taxon parentTaxon = null; + for(HashMap mappedObject : parentsList){ + Taxon taxon = new Taxon(((Integer)mappedObject.get("key")).toString(), getAsString(mappedObject, "scientificName")); + taxon.setCitation(getAsString(mappedObject, "accordingTo")); + taxon.setRank(getAsString(mappedObject, "rank")); + if (parentTaxon!=null) + taxon.setParent(parentTaxon); + parentTaxon = taxon; + } + log.trace("[Benchmark] time to retrieve taxon is "+(System.currentTimeMillis()-start)); + return parentTaxon; + } + + private List retrieveProducts( String taxonId, DataSet dataset, Condition ... properties) throws Exception{ + long start = System.currentTimeMillis(); + QueryCount occurrencesQuery = new QueryCount(baseURL, ResultType.Occurrence); + occurrencesQuery.setConditions(QueryCondition.cond("taxonKey",taxonId), QueryCondition.cond("datasetKey", dataset.getId()), QueryCondition.cond("hasCoordinate","true")); + String productId = Utils.createProductsKey(Utils.getDataSetAsString(dataset), taxonId, this.queryConditions); + Product product = new Product(ProductType.Occurrence, productId); + product.setCount(occurrencesQuery.getCount()); + log.trace("[Benchmark] time to retrieve product is "+(System.currentTimeMillis()-start)); + return Arrays.asList(product); + } + + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/Utils.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/Utils.java new file mode 100644 index 0000000..c82c4b8 --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/Utils.java @@ -0,0 +1,136 @@ +package org.gcube.data.spd.gbifplugin.search; + +import static org.gcube.data.spd.gbifplugin.search.query.QueryCondition.cond; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; + +import org.gcube.data.spd.gbifplugin.search.query.QueryCondition; +import org.gcube.data.spd.model.Condition; +import org.gcube.data.spd.model.Coordinate; +import org.gcube.data.spd.model.products.DataProvider; +import org.gcube.data.spd.model.products.DataSet; + +public class Utils { + + protected static List elaborateConditions(Condition[] properties) throws Exception{ + List queryConditions = new ArrayList(); + for (Condition prop: properties){ + switch (prop.getType()) { + case COORDINATE: + Coordinate coord = (Coordinate)prop.getValue(); + queryConditions.addAll(getCoordinateConditions(coord, prop)); + break; + case DATE: + Calendar date = (Calendar) prop.getValue(); + queryConditions.addAll(getDateCondition(date, prop)); + break; + } + } + return queryConditions; + } + + public static ProductKey elaborateProductsKey(String id) { + List queryConditions = new ArrayList(); + String[] splitString = id.split("\\|\\|"); + + DataSet dataset = getDataSetFromString(splitString[0]); + queryConditions.add(cond("datasetKey", dataset.getId())); + queryConditions.add(cond("taxonKey", splitString[1])); + if (splitString.length>2) + for (int i = 2; i<=splitString.length; i++){ + String[] equalSplit = splitString[i].split("="); + queryConditions.add(cond(equalSplit[0], equalSplit[1])); + } + return new ProductKey(queryConditions, dataset); + } + + protected static String createProductsKey(String dataResourceKey, String taxonKey, List queryConditions) { + StringBuilder conditionTransformer = new StringBuilder(); + for (QueryCondition cond : queryConditions) + conditionTransformer.append("||").append(cond.getKey()).append("=").append(cond.getValue()); + return dataResourceKey+"||"+taxonKey+conditionTransformer.toString(); + } + + public static List getCoordinateConditions(Coordinate coordinate, Condition prop){ + List conditions = new ArrayList(); + switch (prop.getOp()) { + case EQ: + conditions.add(cond("decimalLatitiude",coordinate.getLatitude()+"")); + conditions.add(cond("decimalLongitude",coordinate.getLongitude()+"")); + break; + case GT: + conditions.add(cond("decimalLatitiude",(coordinate.getLatitude()+0.01)+",90")); + conditions.add(cond("decimalLongitude",(coordinate.getLongitude()+0.01)+",180")); + break; + case GE: + conditions.add(cond("decimalLatitiude",coordinate.getLatitude()+",90")); + conditions.add(cond("decimalLongitude",coordinate.getLongitude()+",180")); + break; + case LT: + conditions.add(cond("decimalLatitiude","-90,"+(coordinate.getLatitude()+0.01))); + conditions.add(cond("decimalLongitude","-180,"+(coordinate.getLongitude()+0.01))); + break; + case LE: + conditions.add(cond("decimalLatitiude","-90,"+coordinate.getLatitude())); + conditions.add(cond("decimalLongitude","-180,"+coordinate.getLongitude())); + break; + default: + break; + } + return conditions; + } + + public static List getDateCondition(Calendar date, Condition prop){ + List conditions = new ArrayList(); + DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); + Calendar newDate = date; + Calendar now = Calendar.getInstance(); + switch (prop.getOp()) { + case EQ: + conditions.add(cond("eventDate",dateFormat.format(date.getTime()))); + break; + case GT: + newDate.add(Calendar.DAY_OF_MONTH, -1); + conditions.add(cond("eventDate",dateFormat.format(date.getTime())+","+dateFormat.format(now.getTime()))); + break; + case GE: + conditions.add(cond("eventDate",dateFormat.format(date.getTime())+","+dateFormat.format(now.getTime()))); + break; + case LT: + newDate.add(Calendar.DAY_OF_MONTH, 1); + conditions.add(cond("eventDate","1000-01-01,"+dateFormat.format(now.getTime()))); + break; + case LE: + conditions.add(cond("eventDate","1000-01-01,"+dateFormat.format(now.getTime()))); + break; + default: + break; + } + return conditions; + } + + protected static String getDataSetAsString(DataSet dataset){ + StringBuilder datasetAsString = new StringBuilder( + dataset.getId()) + .append("^^").append(dataset.getName()) + .append("^^").append(dataset.getCitation()) + .append("^^").append(dataset.getDataProvider().getId()) + .append("^^").append(dataset.getDataProvider().getName()); + return datasetAsString.toString(); + } + + protected static DataSet getDataSetFromString(String datasetString){ + String[] splittedDataset = datasetString.split("\\^\\^"); + DataSet dataset = new DataSet(splittedDataset[0]); + dataset.setName(splittedDataset[1]); + dataset.setCitation(splittedDataset[2]); + DataProvider dataProvider = new DataProvider(splittedDataset[3]); + dataProvider.setName(splittedDataset[4]); + dataset.setDataProvider(dataProvider); + return dataset; + } +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/query/MappingUtils.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/MappingUtils.java new file mode 100644 index 0000000..bd3cdc9 --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/MappingUtils.java @@ -0,0 +1,85 @@ +package org.gcube.data.spd.gbifplugin.search.query; + +import java.io.StringReader; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Locale; +import java.util.Map; + +import javax.ws.rs.core.MediaType; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.sun.jersey.api.client.Client; +import com.sun.jersey.api.client.WebResource; +import com.sun.jersey.api.client.config.ClientConfig; +import com.sun.jersey.api.client.config.DefaultClientConfig; + +public class MappingUtils { + + private static Logger log = LoggerFactory.getLogger(MappingUtils.class); + + @SuppressWarnings("unchecked") + public static Map getObjectMapping(String query) throws Exception{ + ClientConfig clientConfig = new DefaultClientConfig(); + Client client = Client.create(clientConfig); + WebResource target = client.resource(query); + //NameUsageWsClient nuws = new NameUsageWsClient(target); + String response = target.type(MediaType.APPLICATION_JSON).acceptLanguage(Locale.ENGLISH).get(String.class); + ObjectMapper mapper = new ObjectMapper(); // can reuse, share globally + return mapper.readValue(new StringReader(response), Map.class); + + } + + public static LinkedList> getObjectList(String query) throws Exception{ + ClientConfig clientConfig = new DefaultClientConfig(); + Client client = Client.create(clientConfig); + WebResource target = client.resource(query); + //NameUsageWsClient nuws = new NameUsageWsClient(target); + String response = target.type(MediaType.APPLICATION_JSON).acceptLanguage(Locale.ENGLISH).get(String.class); + ObjectMapper mapper = new ObjectMapper(); // can reuse, share globally + return mapper.readValue(new StringReader(response), new TypeReference>>() { + }); + + } + + public static String getAsString(Map map, String key){ + if (!map.containsKey(key)) return null; + return (String) map.get(key); + } + + public static Double getAsDouble(Map map, String key){ + if (!map.containsKey(key)) return 0d; + return (Double) map.get(key); + } + + public static Integer getAsInteger(Map map, String key){ + if (!map.containsKey(key)) return 0; + return (Integer) map.get(key); + } + + public static Calendar getAsCalendar(Map map, String key){ + if (!map.containsKey(key)) return null; + return parseCalendar((String) map.get(key)); + } + + + public static Calendar parseCalendar(String date){ + try{ + DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ"); + Calendar calendar= Calendar.getInstance(); + calendar.setTime(df.parse(date)); + return calendar; + }catch (ParseException e) { + log.warn("DateModified discarded ("+date+")"); + return null; + } + } +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/query/PagedQueryIterator.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/PagedQueryIterator.java new file mode 100644 index 0000000..d37eab0 --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/PagedQueryIterator.java @@ -0,0 +1,85 @@ +package org.gcube.data.spd.gbifplugin.search.query; + +import java.util.Iterator; +import java.util.Map; + +import lombok.NonNull; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class PagedQueryIterator implements Iterator{ + + private static Logger log = LoggerFactory.getLogger(PagedQueryIterator.class); + + private @NonNull PagedQueryObject pagedQuery; + + public PagedQueryIterator(@NonNull PagedQueryObject pagedQuery) { + this.pagedQuery = pagedQuery; + } + + protected abstract T getObject(Map mappedObject) throws Exception; + + Map mapping; + + Iterator> resultIterator; + + Map actualObject= null; + + Long start = null; + Long parsingStart = null; + + @SuppressWarnings("unchecked") + @Override + public boolean hasNext() { + try{ + if (resultIterator==null){ + String query = pagedQuery.buildNext(); + start = System.currentTimeMillis(); + mapping = MappingUtils.getObjectMapping(query); + parsingStart = System.currentTimeMillis(); + log.trace("[Benchmark] got Elements with query "+query+" and took "+(parsingStart-start)); + resultIterator = ((Iterable>) mapping.get("results")).iterator(); + } + + if (!resultIterator.hasNext()){ + log.trace("[Benchmark] page retrieved and parsed in "+(System.currentTimeMillis()-start)); + if ((Boolean)mapping.get("endOfRecords")){ + log.trace("is end of records, no next element"); + return false; + } + resultIterator = null; + + } else{ + actualObject = resultIterator.next(); + if (useIt(actualObject)) + return true; + } + return this.hasNext(); + + }catch(Exception e){ + log.error("error computing hasNext",e); + throw new RuntimeException(e); + } + } + + @Override + public T next() { + try{ + return getObject(actualObject); + }catch(Exception e){ + log.error("error computing next",e); + throw new RuntimeException(e); + } + } + + protected boolean useIt(Map mappedObject){ + return true; + } + + @Override + public void remove() { + resultIterator = null; + } + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/query/PagedQueryObject.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/PagedQueryObject.java new file mode 100644 index 0000000..7a2e01f --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/PagedQueryObject.java @@ -0,0 +1,47 @@ +package org.gcube.data.spd.gbifplugin.search.query; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import lombok.Getter; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; + + +@RequiredArgsConstructor +public class PagedQueryObject { + + private @NonNull String baseUri; + + @Getter + List conditions = new ArrayList(); + + private @NonNull ResultType resultType; + + private @NonNull Integer resultPerQuery; + + private int offset = 0; + + public void setConditions(QueryCondition ... conditions){ + this.conditions = Arrays.asList(conditions); + } + + + + public String buildNext(){ + StringBuilder query = new StringBuilder(baseUri); + if (!baseUri.endsWith("/")) query.append("/"); + query.append(this.resultType.getQueryEntry()).append("/"); + query.append("search/?limit=").append(resultPerQuery); + query.append("&offset=").append(offset); + + if (conditions.size()>0) + for (QueryCondition queryCond: conditions) + query.append("&").append(queryCond.getKey()).append("=").append(queryCond.getValue()); + offset = offset+resultPerQuery; + return query.toString(); + } + + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryByIdentifier.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryByIdentifier.java new file mode 100644 index 0000000..e9306b8 --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryByIdentifier.java @@ -0,0 +1,35 @@ +package org.gcube.data.spd.gbifplugin.search.query; + +import java.util.ArrayList; +import java.util.List; + +import lombok.NonNull; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class QueryByIdentifier { + + private @NonNull String baseUri; + + private @NonNull String key; + + private @NonNull QueryType type; + + private List paths = new ArrayList(); + + public void addPath(String path){ + paths.add(path); + } + + public String build(){ + StringBuilder query = new StringBuilder(baseUri); + if (!baseUri.endsWith("/")) query.append("/"); + query.append(this.type.getQueryEntry()).append("/"); + query.append(key); + for (String path : paths) + query.append("/").append(path); + return query.toString(); + } + + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryCondition.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryCondition.java new file mode 100644 index 0000000..911149a --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryCondition.java @@ -0,0 +1,20 @@ +package org.gcube.data.spd.gbifplugin.search.query; + +import lombok.Getter; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; + + +@RequiredArgsConstructor +public class QueryCondition{ + + public static QueryCondition cond(String key, String value){ + return new QueryCondition(key, value); + } + + @Getter + private @NonNull String key; + @Getter + private @NonNull String value; + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryCount.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryCount.java new file mode 100644 index 0000000..4aaad82 --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryCount.java @@ -0,0 +1,54 @@ +package org.gcube.data.spd.gbifplugin.search.query; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import lombok.NonNull; +import lombok.RequiredArgsConstructor; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@RequiredArgsConstructor +public class QueryCount { + + private static Logger log = LoggerFactory.getLogger(QueryCount.class); + + private @NonNull String baseUri; + + List conditions = new ArrayList(); + + private @NonNull ResultType resultType; + + public void setConditions(QueryCondition ... conditions){ + this.conditions = Arrays.asList(conditions); + } + + public int getCount(){ + Map mapping; + try { + mapping = MappingUtils.getObjectMapping(this.build()); + if (mapping.get("count")==null) return 0; + return (Integer)mapping.get("count"); + } catch (Exception e) { + log.error("error computing count, returning 0",e); + return 0; + } + + } + + + private String build(){ + StringBuilder query = new StringBuilder(baseUri); + if (!baseUri.endsWith("/")) query.append("/"); + query.append(this.resultType.getQueryEntry()).append("/"); + query.append("search/?limit=0"); + + if (conditions.size()>0) + for (QueryCondition queryCond: conditions) + query.append("&").append(queryCond.getKey().replaceAll(" ", "%20")).append("=").append(queryCond.getValue().replaceAll(" ", "%20")); + return query.toString(); + } +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryType.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryType.java new file mode 100644 index 0000000..904857c --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/QueryType.java @@ -0,0 +1,17 @@ +package org.gcube.data.spd.gbifplugin.search.query; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NonNull; + + +public @AllArgsConstructor enum QueryType{ + Dataset("dataset"), + Occurrence("occurrence"), + Taxon("species"), + Organization("organization"); + + @Getter + private @NonNull String queryEntry; + +} diff --git a/src/main/java/org/gcube/data/spd/gbifplugin/search/query/ResultType.java b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/ResultType.java new file mode 100644 index 0000000..c3b6e69 --- /dev/null +++ b/src/main/java/org/gcube/data/spd/gbifplugin/search/query/ResultType.java @@ -0,0 +1,15 @@ +package org.gcube.data.spd.gbifplugin.search.query; + +import lombok.Getter; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public enum ResultType{ + Occurrence("occurrence"), + Taxon("species"); + + @Getter + private @NonNull String queryEntry; + +} \ No newline at end of file diff --git a/src/main/resources/META-INF/services/org.gcube.data.spd.plugin.fwk.AbstractPlugin b/src/main/resources/META-INF/services/org.gcube.data.spd.plugin.fwk.AbstractPlugin new file mode 100644 index 0000000..fbcfda3 --- /dev/null +++ b/src/main/resources/META-INF/services/org.gcube.data.spd.plugin.fwk.AbstractPlugin @@ -0,0 +1 @@ +org.gcube.data.spd.gbifplugin.GBIFPlugin \ No newline at end of file diff --git a/src/test/java/org/gcube/data/spd/gbif/QueryTest.java b/src/test/java/org/gcube/data/spd/gbif/QueryTest.java new file mode 100644 index 0000000..617f6f6 --- /dev/null +++ b/src/test/java/org/gcube/data/spd/gbif/QueryTest.java @@ -0,0 +1,139 @@ +package org.gcube.data.spd.gbif; + +import java.io.IOException; +import java.io.StringReader; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Locale; +import java.util.Map; +import java.util.Map.Entry; + +import javax.ws.rs.core.MediaType; + +import org.gcube.data.spd.gbifplugin.search.OccurrenceSearch; +import org.gcube.data.spd.gbifplugin.search.ResultItemSearch; +import org.gcube.data.spd.gbifplugin.search.Utils; +import org.gcube.data.spd.gbifplugin.search.query.MappingUtils; +import org.gcube.data.spd.gbifplugin.search.query.PagedQueryIterator; +import org.gcube.data.spd.gbifplugin.search.query.PagedQueryObject; +import org.gcube.data.spd.gbifplugin.search.query.QueryCondition; +import org.gcube.data.spd.gbifplugin.search.query.ResultType; +import org.gcube.data.spd.model.exceptions.StreamException; +import org.gcube.data.spd.model.products.OccurrencePoint; +import org.gcube.data.spd.model.products.ResultItem; +import org.gcube.data.spd.plugin.fwk.writers.ObjectWriter; +import org.junit.Test; + +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.sun.jersey.api.client.Client; +import com.sun.jersey.api.client.WebResource; +import com.sun.jersey.api.client.config.ClientConfig; +import com.sun.jersey.api.client.config.DefaultClientConfig; + + +public class QueryTest { + + public static final String BASE_URL = "http://api.gbif.org/v1"; + + @Test + public void query4Occurrence(){ + PagedQueryObject qo = new PagedQueryObject("http://api.gbif.org/v1/", ResultType.Occurrence, 50); + qo.setConditions(QueryCondition.cond("scientificName","Palinurus%20elephas"), QueryCondition.cond("hasCoordinate","true")); + PagedQueryIterator pagedQuery = new PagedQueryIterator(qo) { + + @Override + protected String getObject(Map mappedObject) + throws Exception { + System.out.println(mappedObject.toString()); + return mappedObject.toString(); + } + }; + + while (pagedQuery.hasNext()) + System.out.println(pagedQuery.next()); + + } + + @Test + public void query4Taxon() throws JsonParseException, JsonMappingException, IOException{ + ClientConfig clientConfig = new DefaultClientConfig(); + Client client = Client.create(clientConfig); + + WebResource target = client.resource("http://api.gbif.org/v1/species/search/?limit=1&offset=0&q=sarda%20sarda"); + + //NameUsageWsClient nuws = new NameUsageWsClient(target); + String response = target.type(MediaType.APPLICATION_JSON).acceptLanguage(Locale.ENGLISH).get(String.class); + ObjectMapper mapper = new ObjectMapper(); // can reuse, share globally + Map userData = mapper.readValue(new StringReader(response), Map.class); + for (Entry entry : userData.entrySet()) + System.out.println(String.format("entry name %s class value %s", entry.getKey(), entry.getValue().getClass().getSimpleName())); + + } + + @Test + public void searchItems() throws Exception{ + + ResultItemSearch searcher = new ResultItemSearch(BASE_URL,"Limanda limanda"); + + searcher.search(new ObjectWriter() { + + int i =0; + + @Override + public boolean write(StreamException error) { + error.printStackTrace(); + return false; + } + + @Override + public boolean write(ResultItem t) { + System.out.println("written element "+(++i)); + return true; + } + + @Override + public boolean isAlive() { + return true; + } + }, 50); + } + + @Test + public void searchOccurrences() throws Exception{ + + OccurrenceSearch searcher = new OccurrenceSearch(BASE_URL); + + searcher.search(new ObjectWriter() { + + int i =0; + + @Override + public boolean write(StreamException error) { + error.printStackTrace(); + return false; + } + + @Override + public boolean write(OccurrencePoint t) { + System.out.println("written element "+(++i)); + return true; + } + + @Override + public boolean isAlive() { + return true; + } + }, "Limanda limanda", 50); + } + + @Test + public void dataTest() throws Exception{ + Utils.elaborateProductsKey("197908d0-5565-11d8-b290-b8a03c50a862^^Fishbase^^FishBase: Fishbase^^192a9ab0-5565-11d8-b290-b8a03c50a862^^FishBase||5208593"); + + } + +} diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties new file mode 100644 index 0000000..11c7491 --- /dev/null +++ b/src/test/resources/log4j.properties @@ -0,0 +1,10 @@ +log4j.appender.ROOT=org.apache.log4j.ConsoleAppender +log4j.appender.ROOT.layout=org.apache.log4j.PatternLayout +log4j.appender.ROOT.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %c{2} [%t,%M:%L] %m%n +log4j.rootLogger=INFO,ROOT + +log4j.appender.SPD-CL=org.apache.log4j.ConsoleAppender +log4j.appender.SPD-CL.layout=org.apache.log4j.PatternLayout +log4j.appender.SPD-CL.layout.ConversionPattern=[SPD-CL] %d{HH:mm:ss,SSS} %-5p %c{2} [%t,%M:%L] %m%n + +log4j.category.org.gcube.data.spd=TRACE,SPD-CL \ No newline at end of file