deleted taxa matching and SPD algorithms

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngineExternalAlgorithms@131257 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Gianpaolo Coro 2016-09-08 16:52:40 +00:00
parent 92d5097c7d
commit c058ce2602
9 changed files with 0 additions and 2254 deletions

View File

@ -1,481 +0,0 @@
package org.gcube.dataanalysis.JobSMspd;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.data.spd.model.CommonName;
import org.gcube.data.spd.model.products.TaxonomyItem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MapDwCA {
static Logger logger = LoggerFactory.getLogger(MapDwCA.class);
private BufferedWriter vernacularFile;
private File tempFolder;
private List<File> fileList = new ArrayList<File>();
private String archiveZip = "archive-tax.zip";
private String directory;
public MapDwCA(String directory) {
super();
this.directory=directory;
}
public synchronized File createDwCA(Iterator<TaxonomyItem> taxa) throws Exception{
createMetaXml();
createMetadata();
createHeaders();
createTaxaTxt(taxa);
getAllFiles(tempFolder);
return writeZipFile(tempFolder);
}
/**
* Create file meta.xml
*/
private void createMetaXml(){
try {
BufferedWriter bw = null;
BufferedReader br = null;
//tempFolder = File.createTempFile("DwCA-folder", "" );
tempFolder = new File(directory+"DwCA-folder");
//tempFolder.delete();
tempFolder.mkdir();
File output = new File(tempFolder + "/meta.xml") ;
bw = new BufferedWriter(new FileWriter(output));
br = new BufferedReader(new InputStreamReader(MapDwCA.class.getResourceAsStream("/org/gcube/data/spd/dwca/meta.xml")));
String line;
while ((line = br.readLine()) != null) {
bw.write(line);
bw.write('\n');
}
bw.close();
br.close();
} catch (IOException e) {
logger.error("IO Error", e);
}
}
/**
* Create headers in taxa.txt and vernacular.txt
*/
private void createHeaders(){
try {
BufferedWriter file = new BufferedWriter(new FileWriter(tempFolder + "/" + "taxa.txt", true));
vernacularFile = new BufferedWriter(new FileWriter(tempFolder + "/" + "VernacularName.txt", true));
//header
file.write("taxonID\t");
file.write("acceptedNameUsageID\t");
file.write("parentNameUsageID\t");
file.write("scientificName\t");
file.write("scientificNameAuthorship\t");
file.write("nameAccordingTo\t");
file.write("kingdom\t");
file.write("phylum\t");
file.write("class\t");
file.write("order\t");
file.write("family\t");
file.write("genus\t");
file.write("subgenus\t");
file.write("specificEpithet\t");
file.write("infraspecificEpithet\t");
file.write("verbatimTaxonRank\t");
file.write("taxonRank\t");
file.write("taxonomicStatus\t");
file.write("modified\t");
file.write("bibliographicCitation\t");
file.write("taxonRemarks\t");
file.write("scientificNameID\n");
file.close();
//header VernacularName.txt
vernacularFile.write("taxonID\t");
vernacularFile.write("vernacularName\t");
vernacularFile.write("language\t");
vernacularFile.write("locality\n");
vernacularFile.close();
} catch (IOException e) {
logger.error("IO Error", e);
}
}
/**
* Write taxa.txt
*/
public void createTaxaTxt(Iterator<TaxonomyItem> taxaReader){
while (taxaReader.hasNext()) {
TaxonomyItem item = taxaReader.next();
//logger.trace(item.toString());
writeLine(item);
}
}
private void internalWriter(TaxonomyItem taxonomyItem, BufferedWriter file ) throws IOException{
String[] name = taxonomyItem.getScientificName().split(" ");
// Get elemen
TaxonomyItem tax = taxonomyItem.getParent();
Hashtable<String, String> hashTaxa = new Hashtable<String,String>();
//create hashtable with taxonomy keys
if (tax !=null)
getTax(tax, hashTaxa);
//taxonID
file.write(taxonomyItem.getId());
file.write("\t");
//acceptedNameUsageID
if (taxonomyItem.getStatus()==null){
logger.trace("the status is null for "+taxonomyItem.getId());
}if (taxonomyItem.getStatus().getRefId() != null){
String id = taxonomyItem.getStatus().getRefId();
file.write(id);
}
file.write("\t");
//parentNameUsageID
if (tax !=null)
file.write(tax.getId());
file.write("\t");
//scientificName
/*if (taxonomyItem.getCitation() != null)
file.write(taxonomyItem.getScientificName() + " " + taxonomyItem.getCitation());
else*/
file.write(taxonomyItem.getScientificName());
file.write("\t");
//scientificNameAuthorship
if (taxonomyItem.getScientificNameAuthorship()!= null)
file.write(taxonomyItem.getScientificNameAuthorship());
file.write("\t");
if (taxonomyItem.getCitation()!= null)
file.write(taxonomyItem.getCitation());
file.write("\t");
//kingdom
String kingdom = (String)hashTaxa.get("kingdom");
if (kingdom != null)
file.write(kingdom);
file.write("\t");
//phylum
String phylum = (String) hashTaxa.get("phylum");
if (phylum != null)
file.write(phylum);
file.write("\t");
//class
String claz = (String)hashTaxa.get("class");
if (claz != null)
file.write(claz);
file.write("\t");
//order
String order = (String)hashTaxa.get("order");
if (order != null)
file.write(order);
file.write("\t");
//family
String family = (String)hashTaxa.get("family");
if (family != null)
file.write(family);
file.write("\t");
//genus
String genus = (String)hashTaxa.get("genus");
if (genus != null)
file.write(genus);
file.write("\t");
//subgenus
String subgenus = (String)hashTaxa.get("subgenus");
if (subgenus != null)
file.write(subgenus);
file.write("\t");
//specificEpithet
if (name.length>1)
file.write(name[1]);
file.write("\t");
//infraspecificEpithet
if (name.length>2){
file.write(name[name.length-1]);
}
file.write("\t");
//verbatimTaxonRank
if (name.length>2){
file.write(name[name.length-2]);
}
file.write("\t");
//taxonRank
if (taxonomyItem.getRank()!= null)
file.write(taxonomyItem.getRank().toLowerCase());
file.write("\t");
//taxonomicStatus (accepted, synonym, unkonwn)
file.write(taxonomyItem.getStatus().getStatus().toString().toLowerCase());
file.write("\t");
//modified
if (taxonomyItem.getModified() !=null){
DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Date date = taxonomyItem.getModified().getTime();
String s = sdf.format(date);
file.write(s);
}
file.write("\t");
//source
if (taxonomyItem.getCredits() != null)
file.write(taxonomyItem.getCredits());
file.write("\t");
//taxonRemarks
if (taxonomyItem.getStatus().getStatusAsString() != null)
file.write(taxonomyItem.getStatus().getStatusAsString());
file.write("\t");
if (taxonomyItem.getLsid() != null)
file.write(taxonomyItem.getLsid());
file.write("\n");
//write varnacular names
if (taxonomyItem.getCommonNames()!= null){
createVernacularTxt(taxonomyItem.getId(), taxonomyItem.getCommonNames());
}
}
/**
* Insert line in taxa.txt
*/
private void writeLine(TaxonomyItem taxonomyItem){
BufferedWriter bufferedWriter =null;
try {
bufferedWriter = new BufferedWriter(new FileWriter(tempFolder + "/" + "taxa.txt", true));
internalWriter(taxonomyItem, bufferedWriter);
} catch (IOException e) {
logger.error("IO Error", e);
}finally{
try {
if (bufferedWriter!=null)
bufferedWriter.close();
} catch (IOException e) {
logger.error("error closing bufferedWriter",e);
}
}
}
/**
* Write VernacularName.txt
*/
private void createVernacularTxt(String id, List<CommonName> list){
try {
vernacularFile = new BufferedWriter(new FileWriter(tempFolder + "/" + "VernacularName.txt", true));
for (CommonName vernacular : list) {
// logger.trace("Vernacular name: " + vernacular.getName());
//taxonID
vernacularFile.write(id);
vernacularFile.write("\t");
//vernacularName
vernacularFile.write(vernacular.getName());
vernacularFile.write("\t");
//language
if (vernacular.getLanguage()!= null)
vernacularFile.write(vernacular.getLanguage());
vernacularFile.write("\t");
//locality
if (vernacular.getLocality()!= null)
vernacularFile.write(vernacular.getLocality());
vernacularFile.write("\n");
}
vernacularFile.close();
} catch (IOException e) {
logger.error("IO Error", e);
}
}
/**
* Create hashtable with taxonomy keys
*/
private void getTax(TaxonomyItem tax, Hashtable<String, String> taxa){
if(tax!=null)
if(tax.getRank()!=null && tax.getScientificName()!=null)
taxa.put((tax.getRank()).toLowerCase(), tax.getScientificName());
else
{
AnalysisLogger.getLogger().debug("in DWA generator, tax rank or SN are null");
}
else
{
AnalysisLogger.getLogger().debug("tax is null");
}
//writeLine(tax);
// logger.trace("insert parent " + tax.getId() + " " + tax.getScientificName());
if (tax.getParent()!=null)
getTax(tax.getParent(), taxa);
}
/**
* List files in directory
*/
private void getAllFiles(File dir) {
try {
File[] files = dir.listFiles();
for (File file : files) {
fileList.add(file);
if (file.isDirectory()) {
logger.trace("directory:" + file.getCanonicalPath());
getAllFiles(file);
} else {
logger.trace(" file:" + file.getCanonicalPath());
}
}
} catch (IOException e) {
logger.error("error creating files",e);
}
}
/**
* Create zip file
*/
private File writeZipFile(File directoryToZip) throws Exception {
File zipFile = new File(directoryToZip + "/" + archiveZip);
FileOutputStream fos = new FileOutputStream(zipFile);
ZipOutputStream zos = new ZipOutputStream(fos);
for (File file : fileList) {
if (!file.isDirectory()) { // we only zip files, not directories
addToZip(directoryToZip, file, zos);
}
}
zos.close();
fos.close();
return zipFile;
}
/**
* Add files to zip
*/
private void addToZip(File directoryToZip, File file, ZipOutputStream zos) throws FileNotFoundException,
IOException {
FileInputStream fis = new FileInputStream(file);
// we want the zipEntry's path to be a relative path that is relative
// to the directory being zipped, so chop off the rest of the path
String zipFilePath = file.getCanonicalPath().substring(directoryToZip.getCanonicalPath().length() + 1,
file.getCanonicalPath().length());
logger.trace("Writing '" + zipFilePath + "' to zip file");
ZipEntry zipEntry = new ZipEntry(zipFilePath);
zos.putNextEntry(zipEntry);
byte[] bytes = new byte[1024];
int length;
while ((length = fis.read(bytes)) >= 0) {
zos.write(bytes, 0, length);
}
zos.closeEntry();
fis.close();
}
/**
* Create file em.xml
*/
public void createMetadata() throws IOException {
Calendar now = Calendar.getInstance();
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
File output = new File(tempFolder + "/eml.xml") ;
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new FileWriter(output));
} catch (IOException e) {
logger.error("IO Error", e);
}
BufferedReader br = new BufferedReader(new InputStreamReader(MapDwCA.class.getResourceAsStream("/org/gcube/data/spd/dwca/eml.xml")));
String line;
while ((line = br.readLine()) != null) {
bw.write(line.replace("<pubDate></pubDate>", "<pubDate>" + format.format(now.getTime()) + "</pubDate>"));
bw.write('\n');
}
bw.close();
br.close();
}
}

View File

@ -1,507 +0,0 @@
package org.gcube.dataanalysis.JobSMspd;
import static org.gcube.data.spd.client.plugins.AbstractPlugin.manager;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.gcube.common.scope.api.ScopeProvider;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.data.spd.client.proxies.Manager;
import org.gcube.data.spd.model.Conditions;
import org.gcube.data.spd.model.PluginDescription;
import org.gcube.data.spd.model.util.Capabilities;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnTypesList;
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.OutputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.interfaces.StandardLocalExternalAlgorithm;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
import org.gcube.dataanalysis.ecoengine.utils.DynamicEnum;
import org.hibernate.SessionFactory;
public class OccurencesProcedure extends StandardLocalExternalAlgorithm {
LinkedHashMap<String, StatisticalType> map = new LinkedHashMap<String, StatisticalType>();
static String databaseParameterName = "FishBase";
static String userParameterName = "user";
static String passwordParameterName = "password";
static String urlParameterName = "FishBase";
SessionFactory dbconnection = null;
// public static boolean call=false;
String tablename;
File outputResult;
String columnnames;
List<Object> speciesList = null;
protected String fileName;
String outputtablename;
String outputErrortablename;
String outputtable;
HashMap<String, String> dpHash = new HashMap<String, String>();
HashMap<String, String> dpUHash = new HashMap<String, String>();
HashMap<String, String> dpEHash = new HashMap<String, String>();
String tableError;
private static DataPenum dp = null;
private static ExtentionDPEnum dpE = null;
private static UnfoldDPEnum dpU = null;
private String dataProvider = "Data Provider :";
private String chosendataProvider = new String();
private String dataProviderExtention = "Data Provider (Expand Option):";
private String chosendataProviderExtention = new String();
// private String chosendataProviderUnfold="Data Provider Unfold:";
private String dataProviderUnfold = "Data Provider (Unfold Option):";
private String chosendataProviderUnfold = new String();
private Lock lock = new Lock();
@Override
public String getDescription() {
return "An Algorithm that retrieves the occurrences from a data provided based on the given search options";
}
@Override
public void init() throws Exception {
AnalysisLogger.getLogger().debug("SMFaoAlg");
AnalysisLogger.getLogger().debug("Init scope :"+ScopeProvider.instance.get());
}
public void fulfilParameters() throws IOException {
config.setParam("DatabaseDriver", "org.postgresql.Driver");
dbconnection = DatabaseUtils.initDBSession(config);
tablename = getInputParameter("SpeciesTable");
columnnames = getInputParameter("SpeciesColumns");
outputtablename = getInputParameter("OutputTableName");
outputtable = getInputParameter("OutputTable");
tableError = getInputParameter("ErrorTable");
chosendataProviderUnfold = getInputParameter(dataProviderUnfold);
chosendataProviderExtention = getInputParameter(dataProviderExtention);
chosendataProvider = getInputParameter(dataProvider);
outputErrortablename = getInputParameter("ErrorTableName");
String[] columnlist = columnnames.split(AlgorithmConfiguration
.getListSeparator());
speciesList = DatabaseFactory.executeSQLQuery("select " + columnlist[0]
+ " from " + tablename, dbconnection);
fileName = super.config.getPersistencePath() + "results.csv";
outputResult= new File(fileName);
}
@Override
protected void process() throws Exception {
try {
String scope = ScopeProvider.instance.get();
AnalysisLogger.getLogger().debug("process scope :"+scope);
AnalysisLogger.getLogger().debug(
"-------Procedure config scope"
+ config.getGcubeScope());
fulfilParameters();
createTables();
int lenght = (int) (speciesList.size() / 3);
ArrayList<String> chunk1 = new ArrayList<String>();
ArrayList<String> chunk2 = new ArrayList<String>();
ArrayList<String> chunk3 = new ArrayList<String>();
for (int i = 0; i < speciesList.size(); i++) {
if (i < lenght)
chunk1.add((String) speciesList.get(i));
if (i >= lenght && i <= 2 * lenght)
chunk2.add((String) speciesList.get(i));
if (i > 2 * lenght)
chunk3.add((String) speciesList.get(i));
}
ThreadExtractionOccFromSPD t1 = new ThreadExtractionOccFromSPD(
chunk1, chosendataProvider, chosendataProviderExtention,
chosendataProviderUnfold, scope);
ThreadExtractionOccFromSPD t2 = new ThreadExtractionOccFromSPD(
chunk2, chosendataProvider, chosendataProviderExtention,
chosendataProviderUnfold, scope);
ThreadExtractionOccFromSPD t3 = new ThreadExtractionOccFromSPD(
chunk3, chosendataProvider, chosendataProviderExtention,
chosendataProviderUnfold, scope);
Thread th1 = new Thread(t1);
th1.start();
Thread th2 = new Thread(t2);
th2.start();
Thread th3 = new Thread(t3);
th3.start();
th1.join();
th2.join();
th3.join();
File []files= new File[3];
if(t1.getInfo()!= null)
files[0]=t1.getInfo();
if(t1.getInfo()!= null)
files[1]=t2.getInfo();
if(t1.getInfo()!= null)
files[2]=t3.getInfo();
mergeFiles(files, outputResult);
insertInTheTableErrors(t1.getErrors());
insertInTheTableErrors(t2.getErrors());
insertInTheTableErrors(t3.getErrors());
} catch (Exception e) {
e.printStackTrace();
AnalysisLogger.getLogger().debug(e.toString());
throw e;
} finally {
DatabaseUtils.closeDBConnection(dbconnection);
}
}
private void createTables() throws Exception {
DatabaseFactory.executeSQLUpdate("create table " + tableError
+ " (error character varying)", dbconnection);
}
// private void insertInTheTable(ArrayList<ArrayList<String>> arrays)
// throws Exception {
//
// for (ArrayList<String> array : arrays) {
// // String query = "insert into " + outputtable + st + " values (";
// String writeString = new String();
// int i = 0;
//
// for (String s : array) {
// if (i != 0) {
// writeString = writeString + "; ";
// }
// writeString = writeString + " '";
// if (s != null)
// s = s.replace("'", "");
// writeString = writeString + s;
//
// writeString = writeString + "'";
// i++;
//
// }
// write(writeString);
// out.newLine();
//
// }
//
// }
private void insertInTheTableErrors(ArrayList<String> arrays)
throws Exception {
if (arrays != null) {
String st = " (error)";
for (String er : arrays) {
String query = "insert into " + tableError + st + " values ('"
+ er + "')";
AnalysisLogger.getLogger().debug("query error : " + query);
DatabaseFactory.executeSQLUpdate(query, dbconnection);
}
}
}
private void insertEnumValuesr() {
AnalysisLogger.getLogger().debug(" insertEnumValuesr");
AnalysisLogger.getLogger().debug(" second version");
if (dp == null || dpU == null || dpE == null) {
dp = new DataPenum();
dpE = new ExtentionDPEnum();
dpU = new UnfoldDPEnum();
try {
setDynamicParameter();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
AnalysisLogger.getLogger().debug(" call setDynamicParameter");
}
if (dp != null) {
try {
if (UnfoldDPEnumType.values().length == 0) {
AnalysisLogger.getLogger().debug("Only one provider.");
dp = new DataPenum();
dpE = new ExtentionDPEnum();
dpU = new UnfoldDPEnum();
setDynamicParameter();
}
} catch (Exception e) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
e.printStackTrace(pw);
AnalysisLogger.getLogger().debug(sw.toString());
}
}
}
@Override
public void shutdown() {
AnalysisLogger.getLogger().debug("Shut down ");
}
private void setDynamicParameter() throws InterruptedException {
AnalysisLogger.getLogger().debug("Inside setDynamicParameter");
// ScopeProvider.instance.set("/gcube/devsec");
AnalysisLogger.getLogger().debug(
"-------Procedure setParameter in the scope"
+ ScopeProvider.instance.get().toString());
Manager manager = null;
manager = manager().build();
AnalysisLogger.getLogger().debug("build Manager");
AnalysisLogger.getLogger().debug("before dei plugin");
List<PluginDescription> plugin = null;
try {
plugin = manager.getPluginsDescription();
} catch (Exception e) {
String eTracMes = e.getMessage();
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
e.printStackTrace(pw);
sw.toString();
AnalysisLogger.getLogger().debug(eTracMes);
AnalysisLogger.getLogger().debug(sw.toString());
e.printStackTrace();
} finally {
lock.lock();
dp.addEnum(DataProvidersType.class, "ALL");
dpE.addEnum(ExtentionDPType.class, "ALL");
dpU.addEnum(UnfoldDPEnumType.class, "NO OPTION");
dpE.addEnum(ExtentionDPType.class, "NO OPTION");
lock.unlock();
}
AnalysisLogger.getLogger().debug("get plugin");
if (plugin != null) {
AnalysisLogger.getLogger().debug(
"*****PluginDescription is NOT null - length: "
+ plugin.size());
for (int i = 0; i < plugin.size(); i++) {
PluginDescription pluginDescription = plugin.get(i);
AnalysisLogger.getLogger().debug(
"For plugin ***" + pluginDescription.getName());
Map<Capabilities, List<Conditions>> pluginCapabilities = pluginDescription
.getSupportedCapabilities();
AnalysisLogger.getLogger().debug("created maps");
AnalysisLogger.getLogger().debug(
" map size" + pluginCapabilities.size());
for (Entry<Capabilities, List<Conditions>> pluginCapability : pluginCapabilities
.entrySet()) {
Capabilities capability = pluginCapability.getKey();
String capabilityName = capability.name().toString();
AnalysisLogger.getLogger().debug(capabilityName);
if (capabilityName.equals("Unfold"))
dpU.addEnum(UnfoldDPEnumType.class, pluginDescription
.getName().toString());
if (capabilityName.equals("Expansion"))
dpE.addEnum(ExtentionDPType.class, pluginDescription
.getName().toString());
if (capabilityName.equals("Occurrence"))
dp.addEnum(DataProvidersType.class, pluginDescription
.getName().toString());
}
}
} else
AnalysisLogger.getLogger().debug("*****PluginDescription is null");
}
@Override
protected void setInputParameters() {
try {
AnalysisLogger.getLogger().debug("inside setInputParameters2 ");
addRemoteDatabaseInput(databaseParameterName, urlParameterName,
userParameterName, passwordParameterName, "driver",
"dialect");
List<TableTemplates> templates = new ArrayList<TableTemplates>();
templates.add(TableTemplates.GENERIC);
InputTable tinput = new InputTable(templates, "SpeciesTable",
"The table containing the species information");
ColumnTypesList columns = new ColumnTypesList("SpeciesTable",
"SpeciesColumns", "Select the columns for species name",
false);
addStringInput("OutputTableName", "The name of the output table",
"occ_");
addStringInput("ErrorTableName", "The name of the output table",
"err_");
ServiceType randomstring = new ServiceType(
ServiceParameters.RANDOMSTRING, "OutputTable", "", "occ");
ServiceType randomstringErr = new ServiceType(
ServiceParameters.RANDOMSTRING, "ErrorTable", "", "err");
insertEnumValuesr();
addEnumerateInput(DataProvidersType.values(), dataProvider,
"Choose Data Providere", "ALL");
AnalysisLogger.getLogger().debug("After DataProvidersType");
addEnumerateInput(ExtentionDPType.values(), dataProviderExtention,
"Choose Expand Option Data Providere", "ALL");
AnalysisLogger.getLogger().debug("After ExtentionDPType");
addEnumerateInput(UnfoldDPEnumType.values(), dataProviderUnfold,
"Choose UnfoldRR Option Data Providere", "ALL");
AnalysisLogger.getLogger().debug("After UnfoldDPEnumType");
inputs.add(tinput);
inputs.add(columns);
inputs.add(randomstring);
inputs.add(randomstringErr);
DatabaseType.addDefaultDBPars(inputs);
} catch (Throwable e) {
e.printStackTrace();
AnalysisLogger.getLogger().debug(e.toString());
}
// call=true;
}
@Override
public StatisticalType getOutput() {
List<TableTemplates> outtemplate = new ArrayList<TableTemplates>();
outtemplate.add(TableTemplates.GENERIC);
List<TableTemplates> outtemplateErr = new ArrayList<TableTemplates>();
outtemplateErr.add(TableTemplates.GENERIC);
OutputTable outErr = new OutputTable(outtemplate, outputErrortablename,
tableError, "The output table containing all the matches");
PrimitiveType f = new PrimitiveType(File.class.getName(), outputResult, PrimitiveTypes.FILE, "OccFile", "OccFile");
map.put("Output", f);
map.put("Errors", outErr);
PrimitiveType output = new PrimitiveType(HashMap.class.getName(), map,
PrimitiveTypes.MAP, "ResultsMap", "Results Map");
return output;
}
enum DataProvidersType {
}
class DataPenum extends DynamicEnum {
public Field[] getFields() {
Field[] fields = DataProvidersType.class.getDeclaredFields();
return fields;
}
}
enum ExtentionDPType {
}
class ExtentionDPEnum extends DynamicEnum {
public Field[] getFields() {
Field[] fields = ExtentionDPType.class.getDeclaredFields();
return fields;
}
}
enum UnfoldDPEnumType {
}
class UnfoldDPEnum extends DynamicEnum {
public Field[] getFields() {
Field[] fields = UnfoldDPEnumType.class.getDeclaredFields();
return fields;
}
}
public static void mergeFiles(File[] files, File mergedFile) {
AnalysisLogger.getLogger().debug("Inside mergeFiles");
if (mergedFile.exists()){
mergedFile.delete();
}
FileWriter fstream = null;
BufferedWriter out = null;
try {
fstream = new FileWriter(mergedFile, true);
out = new BufferedWriter(fstream);
} catch (IOException e1) {
e1.printStackTrace();
}
String title = "institutionCode, " + "collectionCode, "
+ "catalogueNumber, " + "dataSet, " + "dataProvider, "
+ "dataSource, " + "scientificNameAuthorship,"
+ "identifiedBy," + "credits," + "recordedBy, " + "eventDate, "
+ "modified, " + "scientificName, " + "kingdom, " + "family, "
+ "locality, " + "country, " + "citation, "
+ "decimalLatitude, " + "decimalLongitude, "
+ "coordinateUncertaintyInMeters, " + "maxDepth, "
+ "minDepth, " + "basisOfRecord";
try {
out.write(title);
out.newLine();
for (File f : files) {
System.out.println("merging: " + f.getName());
FileInputStream fis;
fis = new FileInputStream(f);
BufferedReader in = new BufferedReader(new InputStreamReader(
fis));
String aLine;
while ((aLine = in.readLine()) != null) {
out.write(aLine);
out.newLine();
}
in.close();
}
} catch (IOException e) {
e.printStackTrace();
}
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

View File

@ -1,469 +0,0 @@
package org.gcube.dataanalysis.JobSMspd;
import static org.gcube.data.spd.client.plugins.AbstractPlugin.manager;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.lang.reflect.Field;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.Map.Entry;
import org.gcube.common.scope.api.ScopeProvider;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.data.spd.client.proxies.Manager;
import org.gcube.data.spd.model.Conditions;
import org.gcube.data.spd.model.PluginDescription;
import org.gcube.data.spd.model.products.TaxonomyItem;
import org.gcube.data.spd.model.util.Capabilities;
import org.gcube.dataanalysis.JobSMspd.TaxaProcedure.DataPenum;
import org.gcube.dataanalysis.JobSMspd.TaxaProcedure.DataProvidersType;
import org.gcube.dataanalysis.JobSMspd.TaxaProcedure.ExtentionDPEnum;
import org.gcube.dataanalysis.JobSMspd.TaxaProcedure.ExtentionDPType;
import org.gcube.dataanalysis.JobSMspd.TaxaProcedure.UnfoldDPEnum;
import org.gcube.dataanalysis.JobSMspd.TaxaProcedure.UnfoldDPEnumType;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnTypesList;
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.OutputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.interfaces.StandardLocalExternalAlgorithm;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
import org.gcube.dataanalysis.ecoengine.utils.DynamicEnum;
import org.hibernate.SessionFactory;
public class TaxaProcedure extends StandardLocalExternalAlgorithm {
LinkedHashMap<String, StatisticalType> map = new LinkedHashMap<String, StatisticalType>();
static String databaseParameterName = "FishBase";
static String userParameterName = "user";
static String passwordParameterName = "password";
static String urlParameterName = "FishBase";
SessionFactory dbconnection = null;
// public static boolean call=false;
String tablename;
String columnnames;
List<Object> speciesList = null;
// protected String fileName;
// BufferedWriter out;
String outputtablename;
String outputErrortablename;
String outputtable;
HashMap<String, String> dpHash = new HashMap<String, String>();
HashMap<String, String> dpUHash = new HashMap<String, String>();
HashMap<String, String> dpEHash = new HashMap<String, String>();
String tableError;
private static DataPenum dp = null;
private static ExtentionDPEnum dpE = null;
private static UnfoldDPEnum dpU = null;
private String dataProvider = "Data Provider :";
private String chosendataProvider = new String();
private String dataProviderExtention = "Data Provider (Expand Option):";
private String chosendataProviderExtention = new String();
// private String chosendataProviderUnfold="Data Provider Unfold:";
private String dataProviderUnfold = "Data Provider (Unfold Option):";
private String chosendataProviderUnfold = new String();
File file;
private Lock lock = new Lock();
@Override
public String getDescription() {
return " An Algorithm that retrieves the taxon from a data provided based on the given search options";
}
@Override
public void init() throws Exception {
AnalysisLogger.getLogger().debug("SMFaoAlg");
}
public void fulfilParameters() throws IOException {
config.setParam("DatabaseDriver", "org.postgresql.Driver");
dbconnection = DatabaseUtils.initDBSession(config);
tablename = getInputParameter("SpeciesTable");
columnnames = getInputParameter("SpeciesColumns");
outputtablename = getInputParameter("OutputTableName");
outputtable = getInputParameter("OutputTable");
tableError = getInputParameter("ErrorTable");
chosendataProviderUnfold = getInputParameter(dataProviderUnfold);
chosendataProviderExtention = getInputParameter(dataProviderExtention);
chosendataProvider = getInputParameter(dataProvider);
outputErrortablename = getInputParameter("ErrorTableName");
String[] columnlist = columnnames.split(AlgorithmConfiguration
.getListSeparator());
speciesList = DatabaseFactory.executeSQLQuery("select " + columnlist[0]
+ " from " + tablename, dbconnection);
}
@Override
protected void process() throws Exception {
try {
fulfilParameters();
createTables();
int lenght = (int) (speciesList.size() / 3);
ArrayList<String> chunk1 = new ArrayList<String>();
ArrayList<String> chunk2 = new ArrayList<String>();
ArrayList<String> chunk3 = new ArrayList<String>();
for (int i = 0; i < speciesList.size(); i++) {
if (i < lenght)
chunk1.add((String) speciesList.get(i));
if (i >= lenght && i <= 2 * lenght)
chunk2.add((String) speciesList.get(i));
if (i > 2 * lenght)
chunk3.add((String) speciesList.get(i));
}
String scope = ScopeProvider.instance.get();
ThreadExtractionTaxaFromSPD t1 = new ThreadExtractionTaxaFromSPD(
chunk1, chosendataProvider, chosendataProviderExtention,
chosendataProviderUnfold, scope);
ThreadExtractionTaxaFromSPD t2 = new ThreadExtractionTaxaFromSPD(
chunk2, chosendataProvider, chosendataProviderExtention,
chosendataProviderUnfold, scope);
ThreadExtractionTaxaFromSPD t3 = new ThreadExtractionTaxaFromSPD(
chunk3, chosendataProvider, chosendataProviderExtention,
chosendataProviderUnfold, scope);
Thread th1 = new Thread(t1);
th1.start();
Thread th2 = new Thread(t2);
th2.start();
Thread th3 = new Thread(t3);
th3.start();
th1.join();
th2.join();
th3.join();
AnalysisLogger.getLogger().debug("Thread finished");
Vector<TaxonomyItem> taxaList = t1.getTaxaList();
taxaList.addAll(t2.getTaxaList());
taxaList.addAll(t3.getTaxaList());
MapDwCA fileMaker = new MapDwCA(super.config.getPersistencePath());
Iterator<TaxonomyItem> it = taxaList.iterator();
file = fileMaker.createDwCA(it);
AnalysisLogger.getLogger().debug("DWA Created");
insertInTheTableErrors(t1.getErrors());
insertInTheTableErrors(t2.getErrors());
insertInTheTableErrors(t3.getErrors());
} catch (Exception e) {
e.printStackTrace();
throw e;
} finally {
DatabaseUtils.closeDBConnection(dbconnection);
}
}
private void createTables() throws Exception {
DatabaseFactory.executeSQLUpdate("create table " + tableError
+ " (error character varying)", dbconnection);
}
private void insertInTheTable(ArrayList<ArrayList<String>> arrays)
throws IOException {
for (ArrayList<String> array : arrays) {
// String query = "insert into " + outputtable + st + " values (";
String writeString = new String();
int i = 0;
for (String s : array) {
if (i != 0) {
writeString = writeString + "; ";
}
// query = query + ", ";}
writeString = writeString + " '";
// query = query + " '";
if (s != null)
s = s.replace("'", "");
writeString = writeString + s;
// query = query + s;
// query = query + "'";
writeString = writeString + "'";
i++;
}
}
}
private void insertInTheTableErrors(ArrayList<String> arrays)
throws Exception {
if (arrays != null) {
String st = " (error)";
for (String er : arrays) {
String query = "insert into " + tableError + st + " values ('"
+ er + "')";
AnalysisLogger.getLogger().debug("query error : " + query);
DatabaseFactory.executeSQLUpdate(query, dbconnection);
}
}
}
@Override
public void shutdown() {
AnalysisLogger.getLogger().debug("Shut down ");
}
private void insertEnumValuesr() {
AnalysisLogger.getLogger().debug(" insertEnumValuesr");
// if (dp == null || dpU == null || dpE == null) {
// dp = new DataPenum();
// dpE = new ExtentionDPEnum();
// dpU = new UnfoldDPEnum();
// AnalysisLogger.getLogger().debug(" call setDynamicParameter");
//
// setDynamicParameter();
//
// }
// if (dp != null) {
try {
if (UnfoldDPEnumType.values().length == 0) {
AnalysisLogger.getLogger().debug("Only one provider.");
dp = new DataPenum();
dpE = new ExtentionDPEnum();
dpU = new UnfoldDPEnum();
setDynamicParameter();
}
} catch (Exception e) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
e.printStackTrace(pw);
AnalysisLogger.getLogger().debug(sw.toString());
}
// }
}
private void setDynamicParameter() {
AnalysisLogger.getLogger().debug("Inside setDynamicParameter");
AnalysisLogger.getLogger().debug(
"Procedure called in the scope"
+ ScopeProvider.instance.get().toString());
Manager manager = null;
manager = manager().build();
AnalysisLogger.getLogger().debug("build manager");
AnalysisLogger.getLogger().debug("before plugin");
List<PluginDescription> plugin = null;
try {
plugin = manager.getPluginsDescription();
} catch (Exception e) {
String eTracMes = e.getMessage();
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
e.printStackTrace(pw);
AnalysisLogger.getLogger().debug(eTracMes);
AnalysisLogger.getLogger().debug(sw.toString());
} finally {
try {
lock.lock();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
dp.addEnum(DataProvidersType.class, "ALL");
dpE.addEnum(ExtentionDPType.class, "ALL");
dpU.addEnum(UnfoldDPEnumType.class, "NO OPTION");
dpE.addEnum(ExtentionDPType.class, "NO OPTION");
lock.unlock();
}
AnalysisLogger.getLogger().debug("get plugin");
if (plugin != null) {
AnalysisLogger.getLogger().debug(
"*****PluginDescription is NOT null - length: "
+ plugin.size());
for (int i = 0; i < plugin.size(); i++) {
PluginDescription pluginDescription = plugin.get(i);
AnalysisLogger.getLogger().debug(
"For plugin ***" + pluginDescription.getName());
Map<Capabilities, List<Conditions>> pluginCapabilities = pluginDescription
.getSupportedCapabilities();
AnalysisLogger.getLogger().debug("created maps");
AnalysisLogger.getLogger().debug(
" map size" + pluginCapabilities.size());
for (Entry<Capabilities, List<Conditions>> pluginCapability : pluginCapabilities
.entrySet()) {
Capabilities capability = pluginCapability.getKey();
String capabilityName = capability.name().toString();
AnalysisLogger.getLogger().debug(capabilityName);
if (capabilityName.equals("Unfold")) {
dpU.addEnum(UnfoldDPEnumType.class, pluginDescription
.getName().toString());
}
if (capabilityName.equals("Expansion")) {
dpE.addEnum(ExtentionDPType.class, pluginDescription
.getName().toString());
}
if (capabilityName.equals("Classification")) {
dp.addEnum(DataProvidersType.class, pluginDescription
.getName().toString());
}
}
}
} else
AnalysisLogger.getLogger().debug("*****PluginDescription is null");
}
@Override
protected void setInputParameters() {
try {
AnalysisLogger.getLogger().debug("inside setInputParameters ");
addRemoteDatabaseInput(databaseParameterName, urlParameterName,
userParameterName, passwordParameterName, "driver",
"dialect");
List<TableTemplates> templates = new ArrayList<TableTemplates>();
templates.add(TableTemplates.GENERIC);
InputTable tinput = new InputTable(templates, "SpeciesTable",
"The table containing the species information");
ColumnTypesList columns = new ColumnTypesList("SpeciesTable",
"SpeciesColumns", "Select the columns for species name",
false);
addStringInput("OutputTableName", "The name of the output table",
"occ_");
addStringInput("ErrorTableName", "The name of the output table",
"err_");
ServiceType randomstring = new ServiceType(
ServiceParameters.RANDOMSTRING, "OutputTable", "", "tax_");
ServiceType randomstringErr = new ServiceType(
ServiceParameters.RANDOMSTRING, "ErrorTable", "", "err");
AnalysisLogger.getLogger().debug("before setDynamicParameter() ");
// if(!call)
insertEnumValuesr();
addEnumerateInput(DataProvidersType.values(), dataProvider,
"Choose Data Providere", "ALL");
AnalysisLogger.getLogger().debug("After DataProvidersType");
addEnumerateInput(ExtentionDPType.values(), dataProviderExtention,
"Choose Expand Option Data Providere", "ALL");
AnalysisLogger.getLogger().debug("After ExtentionDPType");
addEnumerateInput(UnfoldDPEnumType.values(), dataProviderUnfold,
"Choose Unfold Option Data Providere", "ALL");
AnalysisLogger.getLogger().debug("After UnfoldDPEnumType");
inputs.add(tinput);
inputs.add(columns);
inputs.add(randomstring);
inputs.add(randomstringErr);
DatabaseType.addDefaultDBPars(inputs);
} catch (Throwable e) {
e.printStackTrace();
AnalysisLogger.getLogger().debug(e.toString());
}
// call=true;
}
@Override
public StatisticalType getOutput() {
List<TableTemplates> outtemplate = new ArrayList<TableTemplates>();
outtemplate.add(TableTemplates.GENERIC);
List<TableTemplates> outtemplateErr = new ArrayList<TableTemplates>();
outtemplateErr.add(TableTemplates.GENERIC);
// OutputTable out = new OutputTable(outtemplate, outputtablename,
// outputtable, "The output table containing all the matches");
OutputTable outErr = new OutputTable(outtemplate, outputErrortablename,
tableError, "The output table containing all the matches");
PrimitiveType f = new PrimitiveType(File.class.getName(), file,
PrimitiveTypes.FILE, "OccFile", "OccFile");
map.put("Output", f);
// map.put("Output", out);
map.put("Errors", outErr);
PrimitiveType output = new PrimitiveType(HashMap.class.getName(), map,
PrimitiveTypes.MAP, "ResultsMap", "Results Map");
return output;
// return out;
}
// public void write(String writeSt) {
// try {
// out.write(writeSt);
//
// } catch (IOException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
//
// }
enum DataProvidersType {
}
class DataPenum extends DynamicEnum {
public Field[] getFields() {
Field[] fields = DataProvidersType.class.getDeclaredFields();
return fields;
}
}
enum ExtentionDPType {
}
class ExtentionDPEnum extends DynamicEnum {
public Field[] getFields() {
Field[] fields = ExtentionDPType.class.getDeclaredFields();
return fields;
}
}
enum UnfoldDPEnumType {
}
class UnfoldDPEnum extends DynamicEnum {
public Field[] getFields() {
Field[] fields = UnfoldDPEnumType.class.getDeclaredFields();
return fields;
}
}
}

View File

@ -1,232 +0,0 @@
package org.gcube.dataanalysis.JobSMspd;
import static org.gcube.data.spd.client.plugins.AbstractPlugin.manager;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import org.gcube.common.scope.api.ScopeProvider;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.data.spd.client.proxies.Manager;
import org.gcube.data.spd.model.products.OccurrencePoint;
import org.gcube.data.spd.model.products.ResultElement;
import org.gcube.data.streams.Stream;
public class ThreadExtractionOccFromSPD implements Runnable {
private ArrayList<String> chunk;
private ArrayList<ArrayList<String>> informations;
private ArrayList<String> errors;
BufferedWriter out;
private String dataProvider;
private String dataProviderUnfold;
private String dataProviderExpand;
String scope;
private File tempFile;
public ThreadExtractionOccFromSPD(ArrayList<String> chunk,
String dataProvider, String dataProviderExpand,
String dataProviderUnfold, String scope) {
this.chunk = chunk;
this.dataProvider = dataProvider;
this.dataProviderExpand = dataProviderExpand;
this.dataProviderUnfold = dataProviderUnfold;
errors = new ArrayList<String>();
this.scope = scope;
}
public void run() {
AnalysisLogger.getLogger().debug("SCOPE " + scope);
try {
tempFile = File.createTempFile("chunk"
+ Thread.currentThread().getId(), ".csv");
out = new BufferedWriter(new FileWriter(tempFile, false));
} catch (Exception e) {
AnalysisLogger.getLogger().error(
"Error in the chunk file creation: " + e);
}
ScopeProvider.instance.set(scope);
Manager manager = null;
try {
manager = manager().build();
for (String species : chunk) {
if (species != null) {
String query = new String();
query = createQueryParameter(species);
AnalysisLogger.getLogger().debug("QUERY *******: " + query);
Stream<ResultElement> stream;
try {
stream = manager.search(query);
int i = 0;
while (stream.hasNext()) {
i++;
OccurrencePoint ti = (OccurrencePoint) stream
.next();
ArrayList<String> array = crateRowTable(ti);
insertInTheFile(array);
array = null;
System.gc();
}
if (i == 0) {
errors.add(species + " not found.");
}
} catch (Exception e) {
errors.add("Exception on " + species + " :"
+ e.getMessage());
e.printStackTrace();
}
}
}
out.close();
} catch (Throwable e) {
e.printStackTrace();
AnalysisLogger.getLogger().debug(
"An error occurred: " + e.getMessage());
}
}
private String createQueryParameter(String species) {
String query = "SEARCH BY SN '" + species + "'";
String where = new String();
String expand = new String();
String unfold = new String();
if (dataProvider.equals("ALL"))
where = "";
else
where = " IN " + dataProvider;
if (dataProviderUnfold.equals("NO OPTION"))
unfold = "";
else
unfold = " UNFOLD WITH " + dataProviderUnfold;
query = query + unfold;
AnalysisLogger.getLogger().debug("expand is : " + dataProviderExpand);
if (dataProviderExpand.equals("ALL")) {
expand = " EXPAND";
} else {
AnalysisLogger.getLogger().debug("inside else ");
if (dataProviderExpand.equals("NO OPTION"))
expand = "";
else
expand = " EXPAND WITH " + dataProviderExpand;
}
query = query + expand;
query = query + where;
query = query + " RETURN occurrence";
return query;
}
private ArrayList<String> crateRowTable(OccurrencePoint occurrence) {
ArrayList<String> infoOcc = new ArrayList<String>();
try{infoOcc.add(occurrence.getInstitutionCode().replace(",", " "));}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getCollectionCode().replace(",", " "));}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getCatalogueNumber().replace(",", " "));}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getDataSet().getName());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getDataSet().getDataProvider().getName());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getProvider());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getScientificNameAuthorship());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getIdentifiedBy());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getCredits());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getRecordedBy());}catch(Exception e){infoOcc.add("");}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
if (occurrence.getEventDate() == null
|| sdf.format(occurrence.getEventDate().getTime()).length() == 0)
infoOcc.add("");
else
try{infoOcc.add(sdf.format(occurrence.getEventDate().getTime()));}catch(Exception e){infoOcc.add("");}
if (occurrence.getModified() == null
|| sdf.format(occurrence.getModified().getTime()).length() == 0)
infoOcc.add("");
else
try{infoOcc.add(sdf.format(occurrence.getModified().getTime()));}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getScientificName());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getKingdom());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getFamily());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getLocality());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getCountry());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getCitation());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(Double.toString(occurrence.getDecimalLatitude()));}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(Double.toString(occurrence.getDecimalLongitude()));}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getCoordinateUncertaintyInMeters());}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(Double.toString(occurrence.getMaxDepth()));}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(Double.toString(occurrence.getMinDepth()));}catch(Exception e){infoOcc.add("");}
try{infoOcc.add(occurrence.getBasisOfRecord().toString());}catch(Exception e){infoOcc.add("");}
return infoOcc;
}
private void insertInTheFile(ArrayList<String> array) throws Exception {
// AnalysisLogger.getLogger().debug("INSIDE insertInTheFile");
// String query = "insert into " + outputtable + st + " values (";
String writeString = new String();
int i = 0;
for (String s : array) {
if (i == array.size() - 1) {
if (s == null)
writeString = writeString + " ";
else {
writeString = writeString + s.replace(",", " ");
}
} else if (s == null)
{
writeString = writeString + " ,";
//index of timestamp value, is needed void field
if(i==10||i==11)
writeString = writeString + ",";
}
else {
writeString = writeString + s.replace(",", " ") + ",";
}
i++;
}
// AnalysisLogger.getLogger().debug("record is "+writeString);
write(writeString);
out.newLine();
}
private void write(String writeSt) {
try {
out.write(writeSt);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public File getInfo() {
return tempFile;
}
public ArrayList<String> getErrors() {
return errors;
}
}

View File

@ -1,168 +0,0 @@
package org.gcube.dataanalysis.JobSMspd;
import static org.gcube.data.spd.client.plugins.AbstractPlugin.manager;
import java.util.ArrayList;
import java.util.Vector;
import org.gcube.common.scope.api.ScopeProvider;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.data.spd.client.proxies.Manager;
import org.gcube.data.spd.model.products.OccurrencePoint;
import org.gcube.data.spd.model.products.ResultElement;
import org.gcube.data.spd.model.products.TaxonomyItem;
import org.gcube.data.streams.Stream;
public class ThreadExtractionTaxaFromSPD implements Runnable {
private ArrayList<String> chunk;
private ArrayList<ArrayList<String>> informations;
private ArrayList<String> errors;
private String dataProvider;
private String dataProviderUnfold;
private String dataProviderExpand;
Vector <TaxonomyItem> taxaList= new Vector <TaxonomyItem>();
String scope;
public ThreadExtractionTaxaFromSPD(ArrayList<String> chunk, String dataProvider,String dataProviderExpand,String dataProviderUnfold ,String scope) {
this.chunk = chunk;
for (String species : chunk) {
System.out.println(species);
// AnalysisLogger.getLogger().debug(species);
}
this.dataProvider=dataProvider;
this.dataProviderExpand= dataProviderExpand;
this.dataProviderUnfold=dataProviderUnfold;
informations = new ArrayList<ArrayList<String>>();
errors= new ArrayList<String>();
this.scope=scope;
}
public void run() {
AnalysisLogger.getLogger().debug("SCOPE *******: "+scope);
ScopeProvider.instance.set(scope);
//ScopeProvider.instance.set("/gcube/devsec");
Manager manager=null;
try{
manager = manager().build();
for (String species : chunk) {
if (species != null) {
String query = new String();
// if(dataProviderExpand.equals("NO OPTION"))
// query= "SEARCH BY SN '"+species + "' RETURN occurrence";
// else
// query= "SEARCH BY SN '"+species + "' EXPAND WITH CatalogueOfLife RETURN occurrence";
query=createQueryParameter(species);
System.out.println("QUERY *******: "+query);
AnalysisLogger.getLogger().debug("QUERY *******: "+query);
Stream<ResultElement> stream;
try {
stream = manager.search(query);
int i=0;
while (stream.hasNext()) {
i++;
TaxonomyItem ti = (TaxonomyItem) stream.next();
// AnalysisLogger.getLogger().debug("Inside whiele: "+ti.toString());
taxaList.add(ti);
informations.add(crateRowTable(ti));
}
if(i==0)
{
AnalysisLogger.getLogger().debug(species+" not found.");
errors.add(species+" not found.");
}
} catch (Exception e) {
errors.add("Exception on "+species+" :"+ e.getMessage());
AnalysisLogger.getLogger().debug("Exception on "+species+" :"+ e.getMessage());
e.printStackTrace();
}
}
}
}catch(Throwable e){
e.printStackTrace();
AnalysisLogger.getLogger().debug("An error occurred: "+e.getMessage());
}
}
private String createQueryParameter(String species)
{
String query= "SEARCH BY SN '"+species +"'";
String where=new String();
String expand=new String();
String unfold=new String();
if(dataProvider.equals("ALL"))
where="";
else
where=" IN "+dataProvider;
if(dataProviderUnfold.equals("NO OPTION"))
unfold="";
else
unfold=" UNFOLD WITH "+dataProviderUnfold;
query= query +unfold;
if(dataProviderExpand.equals("ALL"))
expand=" EXPAND";
else{
if(dataProviderExpand.equals("NO OPTION"))
expand="";
else
expand=" EXPAND WITH "+dataProviderExpand;
}
query= query+ expand;
//if(!expand.equals("")& !dataProviderExpand.equals("NO OPTION") )
query=query+ where;
query= query +" RETURN TAXON";
return query;
}
private ArrayList<String> crateRowTable(TaxonomyItem p)
{
ArrayList<String> infoOcc= new ArrayList<String>();
infoOcc.add(p.getScientificName());
infoOcc.add(p.getScientificNameAuthorship());
infoOcc.add(p.getCitation());
infoOcc.add(p.getCredits());
infoOcc.add(p.getId());
infoOcc.add(p.getLsid());
infoOcc.add(p.getProvider());
infoOcc.add(p.getRank());
return infoOcc;
}
public ArrayList<ArrayList<String>> getInfo()
{
return informations;
}
public ArrayList<String> getErrors()
{
return errors;
}
public Vector<TaxonomyItem >getTaxaList()
{
return taxaList;
}
}

View File

@ -1,24 +0,0 @@
package org.gcube.dataanalysis.fin.taxamatch;
import org.gcube.dataanalysis.taxamatch.fin.func_Taxamatch;
public class SimpleTest {
public static void main(String[] args) throws Exception{
func_Taxamatch func = new func_Taxamatch();
String EQUAL = "EQUAL";
String genus = "Gadus";
String species = "morhua";
String ip = "biodiversity.db.i-marine.research-infrastructures.eu";
String user = "postgres";
String password = "0b1s@d4sc13nc3";
String db = "fishbase";
String[] matches = func.func_Taxamatch(genus, species, EQUAL, EQUAL, ip, user, password, db);
System.out.println("Match: "+matches[0]);
}
}

View File

@ -1,204 +0,0 @@
package org.gcube.dataanalysis.fin.taxamatch;
import java.util.ArrayList;
import java.util.List;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnTypesList;
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.OutputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.interfaces.StandardLocalExternalAlgorithm;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
import org.gcube.dataanalysis.taxamatch.fin.func_Taxamatch;
import org.hibernate.SessionFactory;
public class TaxaMatchListTransducer extends StandardLocalExternalAlgorithm {
public static enum operators {
EQUAL, NOT_EQUAL, CONTAINS, BEGINS_WITH, ENDS_WITH
};
static String GenusOperator = "ComparisonOperatorforGenus";
static String SpeciesOperator = "ComparisonOperatorforSpecies";
static String Genus = "Genus";
static String Species = "Species";
static String databaseParameterName = "FishBase";
static String userParameterName = "user";
static String passwordParameterName = "password";
static String urlParameterName = "FishBase";
String outputtablename;
String outputtable;
@Override
public String getDescription() {
return "An algorithm for Taxa Matching with respect to the Fishbase database";
}
@Override
public void init() throws Exception {
}
@Override
protected void process() throws Exception {
SessionFactory dbconnection = null;
try{
System.out.println("taxa->USING THE FOLLOWING PARAMETERS FOR DB:");
System.out.println("taxa->driver:"+config.getParam("DatabaseDriver"));
System.out.println("taxa->url:"+config.getParam("DatabaseURL"));
System.out.println("taxa->user:"+config.getParam("DatabaseUserName"));
System.out.println("taxa->password:"+config.getParam("DatabasePassword"));
config.setParam("DatabaseDriver","org.postgresql.Driver");
dbconnection = DatabaseUtils.initDBSession(config);
String tablename = getInputParameter("TaxaTable");
String columnnames = getInputParameter("TaxaColumns");
outputtablename = getInputParameter("OutputTableName");
outputtable = getInputParameter("OutputTable");
String genusOperator = getInputParameter(GenusOperator);
String speciesOperator = getInputParameter(SpeciesOperator);
System.out.println("taxa->got input parameters");
String databaseJdbc = getInputParameter(urlParameterName).replace("//", "");
int separator = databaseJdbc.lastIndexOf("/");
if (separator<0){
log("Bad database URL: "+databaseJdbc);
return;
}
System.out.println("taxa->got DB parameters "+databaseJdbc);
String databaseIP = databaseJdbc.substring(0,separator);
String databaseName = databaseJdbc.substring(separator+1);
String databaseUser = getInputParameter(userParameterName);
String databasePwd = getInputParameter(passwordParameterName);
// databaseIP = "biodiversity.db.i-marine.research-infrastructures.eu";
// databaseUser = "postgres";
// databasePwd = "0b1s@d4sc13nc3";
// databaseName = "fishbase";
System.out.println("taxa->Fishbase Database Parameters to use: "+databaseIP+" "+databaseName+" "+databaseUser+" "+databasePwd);
//end inputs recover
String[] columnlist = columnnames.split(AlgorithmConfiguration.getListSeparator());
System.out.println("taxa->got columns: "+columnlist[0]+" and "+columnlist[1]);
System.out.println("taxa->Selecting genus ");
List<Object> genusList = DatabaseFactory.executeSQLQuery("select "+columnlist[0]+" from "+tablename, dbconnection);
System.out.println("taxa->Selecting species");
List<Object> speciesList = DatabaseFactory.executeSQLQuery("select "+columnlist[1]+" from "+tablename, dbconnection);
System.out.println("taxa->creating table "+"create table "+outputtable+" (scientific_name character varying, value real)");
DatabaseFactory.executeSQLUpdate("create table "+outputtable+" (scientific_name character varying, value real)", dbconnection);
//loop
System.out.println("taxa->inserting into table "+"insert into "+outputtable+" (scientific_name,value) values ('Gadus morhua', 3)");
DatabaseFactory.executeSQLUpdate("insert into "+outputtable+" (scientific_name,value) values ('Gadus morhua', 3)", dbconnection);
}catch(Exception e){
e.printStackTrace();
throw e;
}
finally{
DatabaseUtils.closeDBConnection(dbconnection);
}
}
private String doTaxaMatch(){
/*
log("Computing matching for " + genus + " " + species);
log("With operators: " + genusOperator + " " + speciesOperator);
if ((genus == null) || (species == null)) {
log("Void input");
addOutputString("Number of Matches", "0");
} else {
func_Taxamatch func = new func_Taxamatch();
AnalysisLogger.getLogger().trace("TaxaMatcher Initialized");
String[] matches = func.func_Taxamatch(genus, species, genusOperator, speciesOperator, databaseIP, databaseUser, databasePwd, databaseName);
if ((matches == null) || (matches.length == 0)) {
log("No match");
addOutputString("Number of Matches", "0");
} else {
log("Found " + matches[0] + " matches");
addOutputString("Number of Matches", matches[0]);
String[] speciesn = matches[1].split("\n");
if (Integer.parseInt(matches[0]) > 0) {
for (int i = 0; i < speciesn.length; i++) {
addOutputString("Match " + (i + 1), speciesn[i].trim());
}
}
}
}
log(outputParameters);
*/
return null;
}
@Override
public void shutdown() {
}
@Override
protected void setInputParameters() {
addEnumerateInput(operators.values(), GenusOperator, "Comparison Operator for Genus", "" + operators.EQUAL);
addEnumerateInput(operators.values(), SpeciesOperator, "Comparison Operator for Species", "" + operators.EQUAL);
addRemoteDatabaseInput(databaseParameterName,urlParameterName,userParameterName,passwordParameterName,"driver","dialect");
List<TableTemplates> templates = new ArrayList<TableTemplates>();
templates.add(TableTemplates.GENERIC);
InputTable tinput = new InputTable(templates,"TaxaTable","The table containing the taxa information");
ColumnTypesList columns = new ColumnTypesList ("TaxaTable","TaxaColumns", "Select the columns for genus and species", false);
addStringInput("OutputTableName", "The name of the output table", "taxa_");
ServiceType randomstring = new ServiceType(ServiceParameters.RANDOMSTRING, "OutputTable","","taxa");
inputs.add(tinput);
inputs.add(columns);
inputs.add(randomstring);
DatabaseType.addDefaultDBPars(inputs);
}
@Override
public StatisticalType getOutput() {
List<TableTemplates> outtemplate = new ArrayList<TableTemplates>();
outtemplate.add(TableTemplates.GENERIC);
OutputTable out = new OutputTable(outtemplate, outputtablename, outputtable, "The output table containing all the matches");
return out;
}
}

View File

@ -1,100 +0,0 @@
package org.gcube.dataanalysis.fin.taxamatch;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.StandardLocalExternalAlgorithm;
import org.gcube.dataanalysis.taxamatch.fin.func_Taxamatch;
public class TaxaMatchTransducer extends StandardLocalExternalAlgorithm {
public static enum operators {
EQUAL, NOT_EQUAL, CONTAINS, BEGINS_WITH, ENDS_WITH
};
static String GenusOperator = "ComparisonOperatorforGenus";
static String SpeciesOperator = "ComparisonOperatorforSpecies";
static String Genus = "Genus";
static String Species = "Species";
static String databaseParameterName = "FishBase";
static String userParameterName = "user";
static String passwordParameterName = "password";
static String urlParameterName = "FishBase";
@Override
public String getDescription() {
return "An algorithm for Taxa Matching with respect to the Fishbase database";
}
@Override
public void init() throws Exception {
}
@Override
protected void process() throws Exception {
String genus = getInputParameter(Genus);
String species = getInputParameter(Species);
String genusOperator = getInputParameter(GenusOperator);
String speciesOperator = getInputParameter(SpeciesOperator);
/*
String databaseIP = "biodiversity.db.i-marine.research-infrastructures.eu";
String databaseUser = "postgres";
String databasePwd = "0b1s@d4sc13nc3";
String databaseName = "fishbase";
*/
String databaseJdbc = getInputParameter(urlParameterName).replace("//", "");
int separator = databaseJdbc.lastIndexOf("/");
if (separator<0){
log("Bad database URL: "+databaseJdbc);
addOutputString("Number of Matches", "0");
return;
}
String databaseIP = databaseJdbc.substring(0,separator);
String databaseName = databaseJdbc.substring(separator+1);
String databaseUser = getInputParameter(userParameterName);
String databasePwd = getInputParameter(passwordParameterName);
log("Database Parameters to use: "+databaseIP+" "+databaseName+" "+databaseUser+" "+databasePwd);
log("Computing matching for " + genus + " " + species);
log("With operators: " + genusOperator + " " + speciesOperator);
if ((genus == null) || (species == null)) {
log("Void input");
addOutputString("Number of Matches", "0");
} else {
func_Taxamatch func = new func_Taxamatch();
AnalysisLogger.getLogger().trace("TaxaMatcher Initialized");
String[] matches = func.func_Taxamatch(genus, species, genusOperator, speciesOperator, databaseIP, databaseUser, databasePwd, databaseName);
if ((matches == null) || (matches.length == 0)) {
log("No match");
addOutputString("Number of Matches", "0");
} else {
log("Found " + matches[0] + " matches");
addOutputString("Number of Matches", matches[0]);
String[] speciesn = matches[1].split("\n");
if (Integer.parseInt(matches[0]) > 0) {
for (int i = 0; i < speciesn.length; i++) {
addOutputString("Match " + (i + 1), speciesn[i].trim());
}
}
}
}
log(outputParameters);
}
@Override
public void shutdown() {
}
@Override
protected void setInputParameters() {
addStringInput(Genus, "Genus of the species", "Gadus");
addStringInput(Species, "Species", "morhua");
addEnumerateInput(operators.values(), GenusOperator, "Comparison Operator for Genus", "" + operators.EQUAL);
addEnumerateInput(operators.values(), SpeciesOperator, "Comparison Operator for Species", "" + operators.EQUAL);
addRemoteDatabaseInput(databaseParameterName,urlParameterName,userParameterName,passwordParameterName,"driver","dialect");
}
}

View File

@ -1,69 +0,0 @@
package org.gcube.dataanalysis.test;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnTypesList;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.interfaces.ComputationalAgent;
import org.gcube.dataanalysis.ecoengine.processing.factories.TransducerersFactory;
import org.gcube.dataanalysis.ecoengine.test.regression.Regressor;
import org.gcube.dataanalysis.fin.taxamatch.TaxaMatchListTransducer.operators;
public class TestFinTaxaMatchList {
public static void main(String[] args) throws Exception {
System.out.println("TEST 1");
List<ComputationalAgent> trans = null;
trans = TransducerersFactory.getTransducerers(testConfig());
trans.get(0).init();
Regressor.process(trans.get(0));
StatisticalType st = trans.get(0).getOutput();
trans = null;
}
private static AlgorithmConfiguration testConfig() {
AlgorithmConfiguration config = Regressor.getConfig();
config.setConfigPath("./cfg/");
config.setPersistencePath("./");
String databaseURL = "//biodiversity.db.i-marine.research-infrastructures.eu/fishbase";
String databaseUser = "postgres";
String databasePwd = "0b1s@d4sc13nc3";
config.setParam("FishBase", databaseURL);
config.setParam("user", databaseUser);
config.setParam("password", databasePwd);
config.setAgent("FIN_TAXA_MATCH_LIST");
config.setParam("ComparisonOperatorforGenus", "EQUAL");
config.setParam("ComparisonOperatorforSpecies", "EQUAL");
config.setParam("DatabaseUserName","utente");
config.setParam("DatabasePassword","d4science");
config.setParam("DatabaseURL","jdbc:postgresql://dbtest.research-infrastructures.eu/testdb");
config.setParam("DatabaseDriver","org.postgresql.Driver");
config.setParam("TaxaTable","generic_id1098fa80_aa83_4441_8ff1_28c4b8e09630");
config.setParam("TaxaColumns","genus"+AlgorithmConfiguration.listSeparator+"species");
String tablename = "testtaxa"+(UUID.randomUUID());
config.setParam("OutputTableName","Test Casey");
config.setParam("OutputTable", tablename.replace("-", ""));
return config;
}
}