catalogue-of-life-spd-plugin/src/main/java/org/gcube/data/spd/catalogueoflife/capabilities/ClassificationCapabilityImp...

470 lines
14 KiB
Java

package org.gcube.data.spd.catalogueoflife.capabilities;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import org.gcube.data.spd.catalogueoflife.CatalogueOfLifePlugin;
import org.gcube.data.spd.catalogueoflife.Utils;
import org.gcube.data.spd.catalogueoflife.parsers.ChildrenParser;
import org.gcube.data.spd.catalogueoflife.parsers.ResultParser;
import org.gcube.data.spd.catalogueoflife.parsers.TaxonParser;
import org.gcube.data.spd.model.Condition;
import org.gcube.data.spd.model.Conditions;
import org.gcube.data.spd.model.exceptions.ExternalRepositoryException;
import org.gcube.data.spd.model.exceptions.IdNotValidException;
import org.gcube.data.spd.model.exceptions.MethodNotSupportedException;
import org.gcube.data.spd.model.exceptions.StreamException;
import org.gcube.data.spd.model.exceptions.StreamNonBlockingException;
import org.gcube.data.spd.model.products.TaxonomyItem;
import org.gcube.data.spd.plugin.fwk.capabilities.ClassificationCapability;
import org.gcube.data.spd.plugin.fwk.capabilities.UnfoldCapability;
import org.gcube.data.spd.plugin.fwk.writers.ClosableWriter;
import org.gcube.data.spd.plugin.fwk.writers.ObjectWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ClassificationCapabilityImpl extends ClassificationCapability implements UnfoldCapability {
Logger logger = LoggerFactory.getLogger(ClassificationCapabilityImpl.class);
public Set<Conditions> getSupportedProperties() {
return Collections.emptySet();
}
/*
*
*/
@Override
public void getSynonymnsById(ObjectWriter<TaxonomyItem> writer, String id)
throws IdNotValidException, MethodNotSupportedException, ExternalRepositoryException {
String pathUrl = CatalogueOfLifePlugin.baseurl + "?id=" + id + "&response=full";
InputStream is = null;
XMLInputFactory ifactory;
XMLEventReader eventReader = null;
try{
is =URI.create(pathUrl).toURL().openStream();
ifactory = XMLInputFactory.newInstance();
eventReader = ifactory.createXMLEventReader(is);
while (eventReader.hasNext()){
XMLEvent event = eventReader.nextEvent();
if (Utils.checkStartElement(event, "synonym")){
event = eventReader.nextEvent();
// logger.trace("result");
if (event.isCharacters())
getSynonym(eventReader, writer, new TaxonParser(true));
continue;
}
else if (Utils.checkEndElement(event, "result")){
break;
}
}
}catch (XMLStreamException e) {
logger.error("error retrieving synonyms");
throw new ExternalRepositoryException();
} catch (Exception e) {
logger.error("error",e);
}finally{
try {
if (eventReader != null)
eventReader.close();
if (is != null)
is.close();
} catch (Exception e) {
logger.error("error closing streams",e);
}
}
}
@Override
public TaxonomyItem retrieveTaxonById(String id) {
try {
return retrieveById(id, new TaxonParser(true));
} catch (Exception e) {
logger.error("error retreiving taxon item with id "+id);
return null;
}
}
private List<String> getChildren(XMLEventReader eventReader) {
Set<String> setIds = new HashSet<String>();
while (eventReader.hasNext()){
XMLEvent event;
try {
event = eventReader.nextEvent();
if (Utils.checkStartElement(event, "id")){
event = eventReader.nextEvent();
String tempId = event.asCharacters().getData();
logger.trace("found id "+ tempId);
setIds.add(tempId);
}else if (Utils.checkEndElement(event, "child_taxa")){
logger.trace("found child taxa closed");
break;
}
} catch (XMLStreamException e) {
logger.error("XMLStreamException",e);
}
}
return Collections.list(Collections.enumeration(setIds));
}
protected void checkCommonName(XMLEventReader eventReader,
ObjectWriter<TaxonomyItem> writer, ResultParser<TaxonomyItem> parser) throws XMLStreamException {
try{
while (eventReader.hasNext()){
XMLEvent event = eventReader.nextEvent();
if (Utils.checkStartElement(event, "name_status")){
event = eventReader.nextEvent();
if ((event.asCharacters().getData()).equals("common name")){
continue;
}
else
break;
}
else if (Utils.checkStartElement(event, "accepted_name")){
if (writer.isAlive())
writer.write(parser.parse(eventReader));
return;
}
}
}catch (Exception e) {
logger.error("Exception",e);
}
}
public <T> void retrieveByScientificName(ObjectWriter<T> writer, ResultParser<T> parser, String name, int start, Set<String> hash) {
String pathUrl = CatalogueOfLifePlugin.baseurl + "?name=" + name.replaceAll(" ", "+") + "&response=full&start=" + start;
logger.trace("PATH " + pathUrl);
int total_number_of_results = 0;
int number_of_results_returned = 0;
InputStream is = null;
XMLInputFactory ifactory;
XMLEventReader eventReader = null;
try{
is =URI.create(pathUrl).toURL().openStream();
ifactory = XMLInputFactory.newInstance();
eventReader = ifactory.createXMLEventReader(is);
while(eventReader.hasNext()){
XMLEvent event = eventReader.nextEvent();
if (Utils.checkStartElement(event, "results")){
StartElement element = event.asStartElement();
@SuppressWarnings("unchecked")
Iterator<Attribute> attributes = element.getAttributes();
while(attributes.hasNext()){
Attribute attribute = attributes.next();
if(attribute.getName().toString().equals("total_number_of_results")){
// logger.trace(attribute.getValue());
total_number_of_results = (Integer.parseInt(attribute.getValue()));
}
else if (attribute.getName().toString().equals("number_of_results_returned")){
// logger.trace(attribute.getValue());
number_of_results_returned = (Integer.parseInt(attribute.getValue()));
}
}
continue;
}else if (Utils.checkStartElement(event, "result")){
if (writer.isAlive()){
writer.write(parser.parse(eventReader));
}
else return;
continue;
}
else if (Utils.checkEndElement(event, "results")){
if (total_number_of_results > number_of_results_returned + start)
retrieveByScientificName(writer, parser , name, start+50, hash);
else
break;
}
}
} catch (MalformedURLException e) {
logger.error("MalformedURLException", e);
} catch (IOException e) {
logger.error("IOException", e);
} catch (XMLStreamException e) {
logger.error("XMLStreamException", e);
} catch (NoSuchElementException e) {
logger.error("NoSuchElementException", e);
}finally{
try {
if (eventReader != null)
eventReader.close();
if (is != null)
is.close();
} catch (XMLStreamException e) {
logger.error("XMLStreamException",e);
} catch (IOException e) {
logger.error("IOException",e);
}
}
}
@Override
public void searchByScientificName(String word,
ObjectWriter<TaxonomyItem> writer, Condition... properties) {
try{
logger.trace("Retrive taxa by scientific name " + word);
retrieveByScientificName(writer, new TaxonParser(true), word+"*", 0, new HashSet<String>());
} catch (Exception e) {
logger.error("Error searching for scientificName",e);
}
}
@Override
public void retrieveTaxonByIds(Iterator<String> ids,
ClosableWriter<TaxonomyItem> writer) {
InputStream is = null;
XMLInputFactory ifactory;
XMLEventReader eventReader = null;
TaxonParser parser = new TaxonParser(true);
while(ids.hasNext()) {
String id = ids.next();
try{
logger.trace("Retrive taxa by id " + id);
String pathUrl = CatalogueOfLifePlugin.baseurl + "?id=" + id + "&response=full";
//System.out.println(pathUrl);
is =URI.create(pathUrl).toURL().openStream();
ifactory = XMLInputFactory.newInstance();
eventReader = ifactory.createXMLEventReader(is);
while (eventReader.hasNext()){
XMLEvent event = eventReader.nextEvent();
if (Utils.checkStartElement(event, "result")){
if (writer.isAlive())
writer.write(parser.parse(eventReader));
continue;
}
else if (Utils.checkEndElement(event, "results")){
break;
}
}
} catch (Exception e) {
logger.error("error reading id "+id,e);
writer.write(new StreamNonBlockingException("CatalogueOfLife",id));
}
}
writer.close();
try {
if (eventReader != null)
eventReader.close();
if (is != null)
is.close();
} catch (XMLStreamException e) {
logger.error("XMLStreamException",e);
} catch (IOException e) {
logger.error("IOException",e);
}
}
private <T> T retrieveById(String id, ResultParser<T> parser) throws XMLStreamException, MalformedURLException, IOException{
String pathUrl = CatalogueOfLifePlugin.baseurl + "?id=" + id + "&response=full";
InputStream is = null;
XMLInputFactory ifactory;
XMLEventReader eventReader = null;
try{
is =URI.create(pathUrl).toURL().openStream();
ifactory = XMLInputFactory.newInstance();
eventReader = ifactory.createXMLEventReader(is);
while (eventReader.hasNext()){
XMLEvent event = eventReader.nextEvent();
if (Utils.checkStartElement(event, "result"))
return parser.parse(eventReader);
}
throw new XMLStreamException("result tag not found");
}finally{
try{
if(eventReader!=null)
eventReader.close();
if (is!=null)
is.close();
}catch (Exception e) {
logger.error("error closing reader",e);
}
}
}
private void getSynonym(XMLEventReader eventReader, ObjectWriter<TaxonomyItem> writer, ResultParser<TaxonomyItem> parser) throws XMLStreamException {
while (eventReader.hasNext()){
XMLEvent event = eventReader.nextEvent();
if (Utils.checkStartElement(event, "id")){
event = eventReader.nextEvent();
if (event.isCharacters()){
// logger.trace(event.asCharacters().getData());
if (writer.isAlive()){
String id = event.asCharacters().getData();
try {
writer.write(retrieveById(id, parser));
} catch (Exception e) {
writer.write(new StreamNonBlockingException("CatalogueOfLife",id));
}
}else
break;
}
}
if (Utils.checkEndElement(event, "synonyms")){
break;
}
}
}
@Override
public List<TaxonomyItem> retrieveTaxonChildrenByTaxonId(String id)
throws IdNotValidException, ExternalRepositoryException {
logger.trace("retrieveTaxonChildrenByTaxonId " + id);
String pathUrl = CatalogueOfLifePlugin.baseurl + "?id=" + id + "&response=full";
logger.trace(pathUrl);
List<TaxonomyItem> list = new ArrayList<TaxonomyItem>();
InputStream is = null;
XMLInputFactory ifactory;
XMLEventReader eventReader = null;
TaxonParser parser = new TaxonParser(false);
try{
is =URI.create(pathUrl).toURL().openStream();
ifactory = XMLInputFactory.newInstance();
eventReader = ifactory.createXMLEventReader(is);
while (eventReader.hasNext()){
XMLEvent event = eventReader.nextEvent();
if (Utils.checkStartElement(event, "child_taxa")){
List<String> listIds= getChildren(eventReader);
// logger.trace("ids found "+listIds);
for (String childrenId: listIds )
list.add(retrieveById(childrenId, parser));
break;
}
else if (Utils.checkEndElement(event, "results")){
break;
}
}
} catch (NoSuchElementException e) {
logger.error("element not found",e);
throw new IdNotValidException();
}catch (Exception e) {
logger.error("repository exception",e);
throw new ExternalRepositoryException(e);
} finally{
try {
if (eventReader != null)
eventReader.close();
if (is != null)
is.close();
} catch (XMLStreamException e) {
logger.error("XMLStreamException",e);
} catch (IOException e) {
logger.error("IOException",e);
}
}
return list;
}
@Override
public void unfold(final ObjectWriter<String> writer, String scientificName) {
ObjectWriter<SpeciesIdentifier> internalWriter = new ObjectWriter<SpeciesIdentifier>() {
@Override
public boolean write(SpeciesIdentifier t) {
ChildrenParser parser = new ChildrenParser();
for (String childId: t.getChildren())
writeAllChildrenName(writer, childId, parser);
return true;
}
@Override
public boolean write(StreamException error) {
return writer.write(error);
}
@Override
public boolean isAlive() {
return writer.isAlive();
}
};
retrieveByScientificName(internalWriter, new ChildrenParser(), scientificName, 0, new HashSet<String>());
}
private void writeAllChildrenName(ObjectWriter<String> writer, String id, ChildrenParser parser){
final int MAX_RETRIES =10;
final long WAIT_TIME = 10000;
try{
SpeciesIdentifier species;
int retries = 0;
boolean exit = false;
do{
try {
species = retrieveById(id, parser);
writer.write(species.getName());
for (String childId: species.getChildren())
writeAllChildrenName(writer, childId, parser);
exit = true;
} catch (IOException e) {
retries++;
try {
Thread.sleep(WAIT_TIME);
} catch (InterruptedException e1) {}
}
}while (retries<MAX_RETRIES && !exit);
if (retries==MAX_RETRIES) throw new Exception("too many retries");
}catch (Exception e) {
writer.write(new StreamNonBlockingException("catalogueOfLife",id));
}
}
}