Branching to have a new version depending on new SmartExecutor

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngineSmartExecutor@112013 82a268e6-3cf1-43bd-a215-b396298e98cf
pull/1/head
Luca Frosini 9 years ago
commit 8c8d1c3167

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"/>
<classpathentry kind="output" path="target/classes"/>
</classpath>

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>EcologicalEngineExecutor</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>

@ -0,0 +1,4 @@
#Fri Jun 22 18:05:41 CEST 2012
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding/<project>=UTF-8

@ -0,0 +1,13 @@
#Fri Jun 22 18:05:41 CEST 2012
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.compliance=1.6
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.6

@ -0,0 +1,5 @@
#Fri Jun 22 17:51:31 CEST 2012
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

@ -0,0 +1,17 @@
<?xml version='1.0' encoding='UTF-8'?>
<hibernate-configuration>
<session-factory>
<property name="connection.driver_class">org.postgresql.Driver</property>
<property name="connection.provider_class">org.hibernate.connection.C3P0ConnectionProvider</property>
<property name="connection.url">jdbc:postgresql://localhost/testdb</property>
<property name="connection.username">gcube</property>
<property name="connection.password">d4science2</property>
<property name="dialect">org.hibernate.dialect.PostgreSQLDialect</property>
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
<property name="c3p0.timeout">0</property>
<property name="c3p0.max_size">1</property>
<property name="c3p0.max_statements">0</property>
<property name="c3p0.min_size">1</property>
<property name="current_session_context_class">thread</property>
</session-factory>
</hibernate-configuration>

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -0,0 +1,5 @@
#!/bin/sh
# AQUAMAPS_SUITABLE
cd $1
java -Xmx1024M -classpath ./:./aquamapsnode.jar:./c3p0-0.9.1.2.jar:./commons-collections-3.1.jar:./dom4j-1.6.1.jar:./ecologicalDataMining.jar:./hibernate3.jar:./jaxen-1.1.2.jar:./jta-1.1.jar:./log4j-1.2.16.jar:./postgresql-8.4-702.jdbc4.jar:./slf4j-api-1.6.0.jar:./slf4j-log4j12-1.6.0.jar:./xpp3_min-1.1.4c.jar:./xstream-1.3.1.jar org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsSuitableNode $2 execution.output

@ -0,0 +1,696 @@
##--------------------------------------------------------
## CMSY analysis with estimation of total biomass, including Bayesian Schaefer
## written by Rainer Froese with support from Gianpaolo Coro in 2013-2014
## This version adjusts biomass to average biomass over the year
## It also contains the FutureCrash option to improve prediction of final biomass
## Version 21 adds the purple point to indicate the 25th percentile of final biomass
## Version 22 accepts that no biomass or CPUE area available
##--------------------------------------------------------
library(R2jags) # Interface with JAGS
library(coda)
#-----------------------------------------
# Some general settings
#-----------------------------------------
# set.seed(999) # use for comparing results between runs
rm(list=ls(all=TRUE)) # clear previous variables etc
options(digits=3) # displays all numbers with three significant digits as default
graphics.off() # close graphics windows from previous sessions
#-----------------------------------------
# General settings for the analysis
#-----------------------------------------
sigR <- 0.02 # overall process error; 0.05 works reasonable for simulations, 0.02 for real data; 0 if deterministic model
n <- 10000 # initial number of r-k pairs
batch.mode <- T # set to TRUE to suppress graphs
write.output <- T # set to true if table of output is wanted
FutureCrash <- "No"
#-----------------------------------------
# Start output to screen
#-----------------------------------------
cat("-------------------------------------------\n")
cat("Catch-MSY Analysis,", date(),"\n")
cat("-------------------------------------------\n")
#------------------------------------------
# Read data and assign to vectors
#------------------------------------------
# filename_1 <- "AllStocks_Catch4.csv"
# filename_2 <- "AllStocks_ID4.csv"
# filename_1 <- "SimCatch.csv"
# filename_2 <- "SimSpec.csv"
# filename_2 <- "SimSpecWrongS.csv"
# filename_2 <- "SimSpecWrongI.csv"
# filename_2 <- "SimSpecWrongF.csv"
# filename_2 <- "SimSpecWrongH.csv"
# filename_2 <- "SimSpecWrongL.csv"
# filename_1 <- "FishDataLim.csv"
# filename_2 <- "FishDataLimSpec.csv"
filename_1 <- "WKLIFE4Stocks.csv"
filename_2 <- "WKLIFE4ID.csv"
outfile<-"outfile"
outfile.txt <- "outputfile.txt"
cdat <- read.csv(filename_1, header=T, dec=".", stringsAsFactors = FALSE)
cinfo <- read.csv(filename_2, header=T, dec=".", stringsAsFactors = FALSE)
cat("Files", filename_1, ",", filename_2, "read successfully","\n")
# Stocks with total biomass data and catch data from StartYear to EndYear
# stocks <- sort(as.character(cinfo$stock)) # All stocks
stocks<-"HLH_M07"
# select one stock after the other
for(stock in stocks) {
# assign data from cinfo to vectors
res <- as.character(cinfo$Resilience[cinfo$stock==stock])
StartYear <- as.numeric(cinfo$StartYear[cinfo$stock==stock])
EndYear <- as.numeric(cinfo$EndYear[cinfo$stock==stock])
r_low <- as.numeric(cinfo$r_low[cinfo$stock==stock])
r_hi <- as.numeric(cinfo$r_hi[cinfo$stock==stock])
stb_low <- as.numeric(cinfo$stb_low[cinfo$stock==stock])
stb_hi <- as.numeric(cinfo$stb_hi[cinfo$stock==stock])
intyr <- as.numeric(cinfo$intyr[cinfo$stock==stock])
intbio_low <- as.numeric(cinfo$intbio_low[cinfo$stock==stock])
intbio_hi <- as.numeric(cinfo$intbio_hi[cinfo$stock==stock])
endbio_low <- as.numeric(cinfo$endbio_low[cinfo$stock==stock])
endbio_hi <- as.numeric(cinfo$endbio_hi[cinfo$stock==stock])
Btype <- as.character(cinfo$Btype[cinfo$stock==stock])
FutureCrash <- as.character(cinfo$FutureCrash[cinfo$stock==stock])
comment <- as.character(cinfo$comment[cinfo$stock==stock])
# extract data on stock
yr <- as.numeric(cdat$yr[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])
ct <- as.numeric(cdat$ct[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that catch is given in tonnes, transforms to '000 tonnes
if(Btype=="observed" | Btype=="CPUE" | Btype=="simulated") {
bt <- as.numeric(cdat$TB[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that biomass is in tonnes, transforms to '000 tonnes
} else {bt <- NA}
nyr <- length(yr) # number of years in the time series
if(Btype!="observed") {bio <- bt}
# change biomass to moving average as assumed by Schaefer (but not for simulations or CPUE)
# for last year use reported bio
if(Btype=="observed") {
ma <- function(x){filter(x,rep(1/2,2),sides=2)}
bio <- ma(bt)
bio[length(bio)] <- bt[length(bt)] }
# initialize vectors for viable r, k, bt
rv.all <- vector()
kv.all <- vector()
btv.all <- matrix(data=vector(),ncol=nyr+1)
#----------------------------------------------------
# Determine initial ranges for parameters and biomass
#----------------------------------------------------
# initial range of r from input file
if(is.na(r_low)==F & is.na(r_hi)==F) {
start_r <- c(r_low,r_hi)
} else {
# initial range of r and CatchMult values based on resilience
if(res == "High") {
start_r <- c(0.6,1.5)} else if(res == "Medium") {
start_r <- c(0.2,0.8)} else if(res == "Low") {
start_r <- c(0.05,0.5)} else { # i.e. res== "Very low"
start_r <- c(0.015,0.1)}
}
# initial range of k values, assuming k will always be larger than max catch
# and max catch will never be smaller than a quarter of MSY
start_k <- c(max(ct),16*max(ct)/start_r[1])
# initial biomass range from input file
if(is.na(stb_low)==F & is.na(stb_hi)==F) {
startbio <- c(stb_low,stb_hi)
} else {
# us low biomass at start as default
startbio <- c(0.1,0.5)
}
MinYear <- yr[which.min(ct)]
MaxYear <- yr[which.max(ct)]
# use year and biomass range for intermediate biomass from input file
if(is.na(intbio_low)==F & is.na(intbio_hi)==F) {
intyr <- intyr
intbio <- c(intbio_low,intbio_hi)
# else if year of minimum catch is at least 3 years away from StartYear and EndYear of series, use min catch
} else if((MinYear - StartYear) > 3 & (EndYear - MinYear) > 3 ) {
# assume that biomass range in year before minimum catch was 0.01 - 0.4
intyr <- MinYear-1
intbio <- c(0.01,0.4)
# else if year of max catch is at least 3 years away from StartYear and EndYear of series, use max catch
} else if((MaxYear - StartYear) > 3 & (EndYear - MaxYear) > 3 ) {
# assume that biomass range in year before maximum catch was 0.3 - 0.9
intyr <- MaxYear-1
intbio <- c(0.3,0.9)
} else {
# assume uninformative range 0-1 in mid-year
intyr <- as.integer(mean(c(StartYear, EndYear)))
intbio <- c(0,1) }
# end of intbio setting
# final biomass range from input file
if(is.na(endbio_low)==F & is.na(endbio_hi)==F) {
endbio <- c(endbio_low,endbio_hi)
} else {
# else use Catch/maxCatch to estimate final biomass
endbio <- if(ct[nyr]/max(ct) > 0.5) {c(0.4,0.8)} else {c(0.01,0.4)}
} # end of final biomass setting
#----------------------------------------------
# MC with Schaefer Function filtering
#----------------------------------------------
Schaefer <- function(ri, ki, startbio, intyr, intbio, endbio, sigR, pt) {
# if stock is not expected to crash within 3 years if last catch continues
if(FutureCrash == "No") {
yr.s <- c(yr,EndYear+1,EndYear+2,EndYear+3)
ct.s <- c(ct,ct[yr==EndYear],ct[yr==EndYear],ct[yr==EndYear])
nyr.s <- length(yr.s)
} else{
yr.s <- yr
ct.s <- ct
nyr.s <- nyr
}
# create vector for initial biomasses
startbt <-seq(from =startbio[1], to=startbio[2], by = (startbio[2]-startbio[1])/10)
# create vectors for viable r, k and bt
rv <- array(-1:-1,dim=c(length(ri)*length(startbt))) #initialize array with -1. The -1 remaining after the process will be removed
kv <- array(-1:-1,dim=c(length(ri)*length(startbt)))
btv <- matrix(data=NA, nrow = (length(ri)*length(startbt)), ncol = nyr+1)
intyr.i <- which(yr.s==intyr) # get index of intermediate year
#loop through r-k pairs
npoints = length(ri)
nstartb = length(startbt)
for(i in 1 : npoints) {
if (i%%1000==0)
cat(".")
# create empty vector for annual biomasses
bt <- vector()
# loop through range of relative start biomasses
for(j in startbt) {
# set initial biomass, including process error
bt[1]=j*ki[i]*exp(rnorm(1,0, sigR)) ## set biomass in first year
#loop through years in catch time series
for(t in 1:nyr.s) { # for all years in the time series
xt=rnorm(1,0, sigR) # set new random process error for every year
# calculate biomass as function of previous year's biomass plus surplus production minus catch
bt[t+1]=(bt[t]+ri[i]*bt[t]*(1-bt[t]/ki[i])-ct.s[t])*exp(xt)
# if biomass < 0.01 k or > 1.1 k, discard r-k pair
if(bt[t+1] < 0.01*ki[i] || bt[t+1] > 1.1*ki[i]) { break } # stop looping through years, go to next upper level
if ((t+1)==intyr.i && (bt[t+1]>(intbio[2]*ki[i]) || bt[t+1]<(intbio[1]*ki[i]))) { break } #intermediate year check
} # end of loop of years
# if last biomass falls without expected ranges goto next r-k pair
if(t < nyr.s || bt[yr.s==EndYear] > (endbio[2]*ki[i]) || bt[yr.s==EndYear] < (endbio[1]*ki[i])) {
next } else {
# store r, k, and bt, plot point, then go to next startbt
rv[((i-1)*nstartb)+j] <- ri[i]
kv[((i-1)*nstartb)+j] <- ki[i]
btv[((i-1)*nstartb)+j,] <- bt[1:(nyr+1)]/ki[i] #substitute a row into the matrix, exclude FutureCrash years
if(pt==T) {points(x=ri[i], y=ki[i], pch=".", cex=2, col="black")
next }
}
} # end of loop of initial biomasses
} # end of loop of r-k pairs
rv=rv[rv!=-1]
kv=kv[kv!=-1]
btv=na.omit(btv) #delete first line
cat("\n")
return(list(rv, kv,btv))
} # end of Schaefer function
#------------------------------------------------------------------
# Uniform sampling of the r-k space
#------------------------------------------------------------------
# get random set of r and k from log space distribution
ri1 = exp(runif(n, log(start_r[1]), log(start_r[2])))
ki1 = exp(runif(n, log(start_k[1]), log(start_k[2])))
#-----------------------------------------------------------------
# Plot data and progress
#-----------------------------------------------------------------
#windows(14,9)
par(mfcol=c(2,3))
# plot catch
plot(x=yr, y=ct, ylim=c(0,1.2*max(ct)), type ="l", bty="l", main=paste(stock,"catch"), xlab="Year",
ylab="Catch", lwd=2)
points(x=yr[which.max(ct)], y=max(ct), col="red", lwd=2)
points(x=yr[which.min(ct)], y=min(ct), col="red", lwd=2)
# plot r-k graph
plot(ri1, ki1, xlim = start_r, ylim = start_k, log="xy", xlab="r", ylab="k", main="Finding viable r-k", pch=".", cex=2, bty="l", col="lightgrey")
#1 - Call MC-Schaefer function to preliminary explore the space without prior information
cat(stock, ": First Monte Carlo filtering of r-k space with ",n," points\n")
MCA <- Schaefer(ri=ri1, ki=ki1, startbio=startbio, intyr=intyr, intbio=intbio, endbio=endbio, sigR=sigR, pt=T)
rv.all <- append(rv.all,MCA[[1]])
kv.all <- append(kv.all,MCA[[2]])
btv.all <- rbind(btv.all,MCA[[3]])
#take viable r and k values
nviablepoints = length(rv.all)
cat("* Found ",nviablepoints," viable points from ",n," samples\n");
#if few points were found then resample and shrink the k log space
if (nviablepoints<=1000){
log.start_k.new <- log(start_k)
max_attempts = 3
current_attempts = 1
while (nviablepoints<=1000 && current_attempts<=max_attempts){
if(nviablepoints > 0) {
log.start_k.new[1] <- mean(c(log.start_k.new[1], min(log(kv.all))))
log.start_k.new[2] <- mean(c(log.start_k.new[2], max(log(kv.all)))) }
n.new=n*current_attempts #add more points
ri1 = exp(runif(n.new, log(start_r[1]), log(start_r[2])))
ki1 = exp(runif(n.new, log.start_k.new[1], log.start_k.new[2]))
cat("Shrinking k space: repeating Monte Carlo in the interval [",exp(log.start_k.new[1]),",",exp(log.start_k.new[2]),"]\n")
cat("Attempt ",current_attempts," of ",max_attempts," with ",n.new," points","\n")
MCA <- Schaefer(ri=ri1, ki=ki1, startbio=startbio, intyr=intyr, intbio=intbio, endbio=endbio, sigR=sigR, pt=T)
rv.all <- append(rv.all,MCA[[1]])
kv.all <- append(kv.all,MCA[[2]])
btv.all <- rbind(btv.all,MCA[[3]])
nviablepoints = length(rv.all) #recalculate viable points
cat("* Found altogether",nviablepoints," viable points \n");
current_attempts=current_attempts+1 #increment the number of attempts
}
}
# If tip of viable r-k pairs is 'thin', do extra sampling there
gm.rv = exp(mean(log(rv.all)))
if(length(rv.all[rv.all > 0.9*start_r[2]]) < 10) {
l.sample.r <- (gm.rv + max(rv.all))/2
cat("Final sampling in the tip area above r =",l.sample.r,"\n")
log.start_k.new <- c(log(0.8*min(kv.all)),log(max(kv.all[rv.all > l.sample.r])))
ri1 = exp(runif(50000, log(l.sample.r), log(start_r[2])))
ki1 = exp(runif(50000, log.start_k.new[1], log.start_k.new[2]))
MCA <- Schaefer(ri=ri1, ki=ki1, startbio=startbio, intyr=intyr, intbio=intbio, endbio=endbio, sigR=sigR, pt=T)
rv.all <- append(rv.all,MCA[[1]])
kv.all <- append(kv.all,MCA[[2]])
btv.all <- rbind(btv.all,MCA[[3]])
nviablepoints = length(rv.all) #recalculate viable points
cat("Found altogether", length(rv.all), "unique viable r-k pairs and biomass trajectories\n")
}
# ------------------------------------------------------------
# Bayesian analysis of catch & biomass with Schaefer model
# ------------------------------------------------------------
if(Btype == "observed" | Btype=="simulated") {
cat("Running Schaefer MCMC analysis....\n")
mcmc.burn <- as.integer(30000)
mcmc.chainLength <- as.integer(60000) # burn-in plus post-burn
mcmc.thin = 10 # to reduce autocorrelation
mcmc.chains = 3 # needs to be at least 2 for DIC
# Parameters to be returned by JAGS
jags.save.params=c('r','k','sigma.b', 'alpha', 'sigma.r') #
# JAGS model
Model = "model{
# to avoid crash due to 0 values
eps<-0.01
# set a quite narrow variation from the expected value
sigma.b <- 1/16
tau.b <- pow(sigma.b,-2)
Bm[1] <- log(alpha*k)
bio[1] ~ dlnorm(Bm[1],tau.b)
for (t in 2:nyr){
bio[t] ~ dlnorm(Bm[t],tau.b)
Bm[t] <- log(max(bio[t-1] + r*bio[t-1]*(1 - (bio[t-1])/k) - ct[t-1], eps))
}
# priors
alpha ~ dunif(0.01,1) # needed for fit of first biomass
#inverse cubic root relationship between the range of viable r and the size of the search space
inverseRangeFactor <- 1/((start_r[2]-start_r[1])^1/3)
# give sigma some variability in the inverse relationship
sigma.r ~ dunif(0.001*inverseRangeFactor,0.02*inverseRangeFactor)
tau.r <- pow(sigma.r,-2)
rm <- log((start_r[1]+start_r[2])/2)
r ~ dlnorm(rm,tau.r)
# search in the k space from the center of the range. Allow high variability
km <- log((start_k[1]+start_k[2])/2)
tau.k <- pow(km,-2)
k ~ dlnorm(km,tau.k)
#end model
}"
# Write JAGS model to file
cat(Model, file="r2jags.bug")
### random seed
set.seed(runif(1,1,500)) # needed in JAGS
### run model
jags_outputs <- jags(data=c('ct','bio','nyr', 'start_r', 'start_k'),
working.directory=NULL, inits=NULL,
parameters.to.save= jags.save.params,
model.file="r2jags.bug", n.chains = mcmc.chains,
n.burnin = mcmc.burn, n.thin = mcmc.thin, n.iter = mcmc.chainLength,
refresh=mcmc.burn/20, )
# ------------------------------------------------------
# Results from JAGS Schaefer
# ------------------------------------------------------
r_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$r))
k_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$k))
## sigma_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$sigma.b))
alpha_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$alpha))
## sigma.r_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$sigma.r))
mean.log.r.jags <- mean(log(r_out))
SD.log.r.jags <- sd(log(r_out))
lcl.log.r.jags <- mean.log.r.jags-1.96*SD.log.r.jags
ucl.log.r.jags <- mean.log.r.jags+1.96*SD.log.r.jags
gm.r.jags <- exp(mean.log.r.jags)
lcl.r.jags <- exp(lcl.log.r.jags)
ucl.r.jags <- exp(ucl.log.r.jags)
mean.log.k.jags <- mean(log(k_out))
SD.log.k.jags <- sd(log(k_out))
lcl.log.k.jags <- mean.log.k.jags-1.96*SD.log.k.jags
ucl.log.k.jags <- mean.log.k.jags+1.96*SD.log.k.jags
gm.k.jags <- exp(mean.log.k.jags)
lcl.k.jags <- exp(lcl.log.k.jags)
ucl.k.jags <- exp(ucl.log.k.jags)
mean.log.MSY.jags<- mean(log(r_out)+log(k_out)-log(4))
SD.log.MSY.jags <- sd(log(r_out)+log(k_out)-log(4))
gm.MSY.jags <- exp(mean.log.MSY.jags)
lcl.MSY.jags <- exp(mean.log.MSY.jags-1.96*SD.log.MSY.jags)
ucl.MSY.jags <- exp(mean.log.MSY.jags+1.96*SD.log.MSY.jags)
} # end of MCMC Schaefer loop
#------------------------------------
# get results from CMSY
#------------------------------------
# get estimate of most probable r as median of mid log.r-classes above cut-off
# get remaining viable log.r and log.k
rem.log.r <- log(rv.all[rv.all > gm.rv])
rem.log.k <- log(kv.all[rv.all>gm.rv])
# get vectors with numbers of r and mid values in about 25 classes
hist.log.r <- hist(x=rem.log.r, breaks=25, plot=F)
log.r.counts <- hist.log.r$counts
log.r.mids <- hist.log.r$mids
# get most probable log.r as mean of mids with counts > 0
log.r.est <- median(log.r.mids[which(log.r.counts > 0)])
lcl.log.r <- as.numeric(quantile(x=log.r.mids[which(log.r.counts > 0)], 0.025))
ucl.log.r <- as.numeric(quantile(x=log.r.mids[which(log.r.counts > 0)], 0.975))
r.est <- exp(log.r.est)
lcl.r.est <- exp(lcl.log.r)
ucl.r.est <- exp(ucl.log.r)
# do linear regression of log k ~ log r with slope fixed to -1 (from Schaefer)
reg <- lm(rem.log.k ~ 1 + offset(-1*rem.log.r))
int.reg <- as.numeric(reg[1])
sd.reg <- sd(resid(reg))
se.reg <- summary(reg)$coefficients[2]
# get estimate of log(k) from y where x = log.r.est
log.k.est <- int.reg + (-1) * log.r.est
# get estimates of CL of log.k.est from y +/- SD where x = lcl.log r or ucl.log.r
lcl.log.k <- int.reg + (-1) * ucl.log.r - sd.reg
ucl.log.k <- int.reg + (-1) * lcl.log.r + sd.reg
k.est <- exp(log.k.est)
lcl.k.est <- exp(lcl.log.k)
ucl.k.est <- exp(ucl.log.k)
# get MSY from remaining log r-k pairs
log.MSY.est <- mean(rem.log.r + rem.log.k - log(4))
sd.log.MSY.est <- sd(rem.log.r + rem.log.k - log(4))
lcl.log.MSY.est <- log.MSY.est - 1.96*sd.log.MSY.est
ucl.log.MSY.est <- log.MSY.est + 1.96*sd.log.MSY.est
MSY.est <- exp(log.MSY.est)
lcl.MSY.est <- exp(lcl.log.MSY.est)
ucl.MSY.est <- exp(ucl.log.MSY.est)
# get predicted biomass vectors as median and quantiles of trajectories
median.btv <- apply(btv.all,2, median)
lastyr.bio <- median.btv[length(median.btv)-1]
nextyr.bio <- median.btv[length(median.btv)]
lcl.btv <- apply(btv.all,2, quantile, probs=0.025)
q.btv <- apply(btv.all,2, quantile, probs=0.25)
ucl.btv <- apply(btv.all,2, quantile, probs=0.975)
lcl.lastyr.bio <- lcl.btv[length(lcl.btv)-1]
ucl.lastyr.bio <- ucl.btv[length(lcl.btv)-1]
lcl.nextyr.bio <- lcl.btv[length(lcl.btv)]
ucl.nextyr.bio <- ucl.btv[length(lcl.btv)]
# -----------------------------------------
# Plot results
# -----------------------------------------
# Analysis of viable r-k pairs
plot(x=rv.all, y=kv.all, xlim=start_r,
ylim=c(0.9*min(kv.all, ifelse(Btype == "observed",k_out,NA), na.rm=T), 1.1*max(kv.all)),
pch=16, col="grey",log="xy", bty="l",
xlab="r", ylab="k", main="Analysis of viable r-k")
abline(v=gm.rv, lty="dashed")
# plot points and best estimate from full Schaefer analysis
if(Btype == "observed"|Btype=="simulated") {
# plot r-k pairs from MCMC
points(x=r_out, y=k_out, pch=16,cex=0.5)
# plot best r-k pair from MCMC
points(x=gm.r.jags, y=gm.k.jags, pch=19, col="green")
lines(x=c(lcl.r.jags, ucl.r.jags),y=c(gm.k.jags,gm.k.jags), col="green")
lines(x=c(gm.r.jags,gm.r.jags),y=c(lcl.k.jags, ucl.k.jags), col="green")
}
# if data are from simulation, plot true r and k
if(Btype=="simulated") {
l.stock <- nchar(stock) # get length of sim stock name
r.char <- substr(stock,l.stock-1,l.stock) # get last character of sim stock name
r.sim <- NA # initialize vector for r used in simulation
if(r.char=="_H") {r.sim=1; lcl.r.sim=0.8; ucl.r.sim=1.25} else
if(r.char=="_M") {r.sim=0.5;lcl.r.sim=0.4;ucl.r.sim=0.62} else
if(r.char=="_L") {r.sim=0.25;lcl.r.sim=0.2;ucl.r.sim=0.31} else {r.sim=0.05;lcl.r.sim=0.04;ucl.r.sim=0.062}
# plot true r-k point with error bars
points(x=r.sim, y=1000, pch=19, col="red")
# add +/- 20% error bars
lines(x=c(lcl.r.sim,ucl.r.sim), y=c(1000,1000), col="red")
lines(x=c(r.sim,r.sim), y=c(800,1250), col="red")
}
# plot blue dot for proposed r-k, with 95% CL lines
points(x=r.est, y=k.est, pch=19, col="blue")
lines(x=c(lcl.r.est, ucl.r.est),y=c(k.est,k.est), col="blue")
lines(x=c(r.est,r.est),y=c(lcl.k.est, ucl.k.est), col="blue")
# plot biomass graph
# determine k to use for red line in b/k plot
if(Btype=="simulated") {k2use <- 1000} else
if(Btype == "observed") {k2use <- gm.k.jags} else {k2use <- k.est}
# determine hight of y-axis in plot
max.y <- max(c(bio/k2use,ucl.btv,0.6,startbio[2], intbio[2],endbio[2]),na.rm=T)
plot(x=yr,y=median.btv[1:nyr], lwd=2, xlab="Year", ylab="Relative biomass b/k", type="l",
ylim=c(0,max.y), bty="l", main=paste("Pred. biomass vs ", Btype,sep=""))
lines(x=yr, y=lcl.btv[1:nyr],type="l")
lines(x=yr, y=ucl.btv[1:nyr],type="l")
points(x=EndYear,y=q.btv[yr==EndYear], col="purple", cex=1.5, lwd=2)
abline(h=0.5, lty="dashed")
abline(h=0.25, lty="dotted")
lines(x=c(yr[1],yr[1]), y=startbio, col="blue")
lines(x=c(intyr,intyr), y=intbio, col="blue")
lines(x=c(max(yr),max(yr)), y=endbio, col="blue")
# if observed biomass is available, plot red biomass line
if(Btype == "observed"|Btype=="simulated") {
lines(x=yr, y=bio/k2use,type="l", col="red", lwd=1)
}
# if CPUE data are available, scale to predicted biomass range, plot red biomass line
if(Btype == "CPUE") {
par(new=T) # prepares for new plot on top of previous
plot(x=yr, y=bio, type="l", col="red", lwd=1,
ann=F,axes=F,ylim=c(0,1.2*max(bio, na.rm=T))) # forces this plot on top of previous one
axis(4, col="red", col.axis="red")
}
# plot yield and biomass against equilibrium surplus parabola
max.y <-max(c(ct/MSY.est,ifelse(Btype=="observed"|Btype=="simulated",ct/gm.MSY.jags,NA),1.2),na.rm=T)
# plot parabola
x=seq(from=0,to=2,by=0.001)
y=4*x-(2*x)^2
plot(x=x, y=y, xlim=c(0,1), ylim=c(0,max.y), type="l", bty="l",xlab="Relative biomass b/k",
ylab="Catch / MSY", main="Equilibrium curve")
# plot catch against CMSY biomass estimates
points(x=median.btv[1:nyr], y=ct/MSY.est, pch=16, col="grey")
points(x=q.btv[yr==EndYear],y=ct[yr==EndYear]/MSY.est, col="purple", cex=1.5, lwd=2)
# plot catch against observed biomass or CPUE
if(Btype == "observed"|Btype=="simulated") {
points(x=bio/k2use, y=ct/gm.MSY.jags, pch=16, cex=0.5)
}
# plot exploitation rate u against u.msy
# get u derived from predicted CMSY biomass
u.CMSY <- ct/(median.btv[1:nyr]*k.est)
u.msy.CMSY <- 1-exp(-r.est/2) # # Fmsy from CMSY expressed as exploitation rate
# get u from observed or simulated biomass
if(Btype == "observed"|Btype=="simulated") {
u.bio <- ct/bio
u.msy.bio <- 1-exp(-gm.r.jags/2)
}
# get u from CPUE
if(Btype == "CPUE") {
q=max(median.btv[1:nyr][is.na(bio)==F],na.rm=T)*k.est/max(bio,na.rm=T)
u.CPUE <- ct/(q*bio)
}
# determine upper bound of Y-axis
max.y <- max(c(1.5, 1.2*u.CMSY/u.msy.CMSY,ct[yr==EndYear]/(q.btv[yr==EndYear]*k.est)/u.msy.CMSY,
ifelse(Btype=="observed"|Btype=="simulated",max(u.bio[is.na(u.bio)==F]/u.msy.bio),0),
na.rm=T))
# plot u from CMSY
plot(x=yr,y=u.CMSY/u.msy.CMSY, type="l", bty="l", ylim=c(0,max.y), xlab="Year",
ylab="u / u_msy", main="Exploitation rate")
abline(h=1, lty="dashed")
points(x=EndYear,y=ct[yr==EndYear]/(q.btv[yr==EndYear]*k.est)/u.msy.CMSY, col="purple", cex=1.5, lwd=2)
# plot u from biomass
if(Btype == "observed"|Btype=="simulated") lines(x=yr, y=u.bio/u.msy.bio, col="red")
# plot u from CPUE
if(Btype == "CPUE") {
par(new=T) # prepares for new plot on top of previous
plot(x=yr, y=u.CPUE, type="l", col="red", ylim=c(0, 1.2*max(u.CPUE,na.rm=T)),ann=F,axes=F)
axis(4, col="red", col.axis="red")
}
if(batch.mode == TRUE) {dev.off()} # close plot window
# ------------------------------------------
# print input and results to screen
cat("---------------------------------------\n")
cat("Species:", cinfo$ScientificName[cinfo$stock==stock], "\n")
cat("Name and region:", cinfo$EnglishName[cinfo$stock==stock], ",", cinfo$Name[cinfo$stock==stock], "\n")
cat("Stock:",stock,"\n")
cat("Catch data used from years", min(yr),"-", max(yr), "\n")
cat("Prior initial relative biomass =", startbio[1], "-", startbio[2], "\n")
cat("Prior intermediate rel. biomass=", intbio[1], "-", intbio[2], "in year", intyr, "\n")
cat("Prior final relative biomass =", endbio[1], "-", endbio[2], "\n")
cat("If current catches continue, is the stock likely to crash within 3 years?",FutureCrash,"\n")
cat("Prior range for r =", format(start_r[1],digits=2), "-", format(start_r[2],digits=2),
", prior range for k =", start_k[1], "-", start_k[2],"\n")
# if data are simulated, print true r-k
if(filename_1=="SimCatch.csv") {
cat("True r =", r.sim, "(because input data were simulated with Schaefer model)\n")
cat("True k = 1000 \n")
cat("True MSY =", 1000*r.sim/4,"\n")
cat("True biomass in last year =",bio[length(bio)],"or",bio[length(bio)]/1000,"k \n")
cat("True mean catch / MSY ratio =", mean(ct)/(1000*r.sim/4),"\n")
}
# print results from full Schaefer if available
if(Btype == "observed"|Btype=="simulated") {
cat("Results from Bayesian Schaefer model using catch & biomass (",Btype,")\n")
cat("MSY =", gm.MSY.jags,", 95% CL =", lcl.MSY.jags, "-", ucl.MSY.jags,"\n")
cat("Mean catch / MSY =", mean(ct)/gm.MSY.jags,"\n")
if(Btype != "CPUE") {
cat("r =", gm.r.jags,", 95% CL =", lcl.r.jags, "-", ucl.r.jags,"\n")
cat("k =", gm.k.jags,", 95% CL =", lcl.k.jags, "-", ucl.k.jags,"\n")
}
}
# results of CMSY analysis
cat("Results of CMSY analysis \n")
cat("Altogether", nviablepoints,"unique viable r-k pairs were found \n")
cat(nviablepoints-length(rem.log.r),"r-k pairs above the initial geometric mean of r =", gm.rv, "were analysed\n")
cat("r =", r.est,", 95% CL =", lcl.r.est, "-", ucl.r.est,"\n")
cat("k =", k.est,", 95% CL =", lcl.k.est, "-", ucl.k.est,"\n")
cat("MSY =", MSY.est,", 95% CL =", lcl.MSY.est, "-", ucl.MSY.est,"\n")
cat("Predicted biomass in last year =", lastyr.bio, "2.5th perc =", lcl.lastyr.bio,
"97.5th perc =", ucl.lastyr.bio,"\n")
cat("Predicted biomass in next year =", nextyr.bio, "2.5th perc =", lcl.nextyr.bio,
"97.5th perc =", ucl.nextyr.bio,"\n")
cat("----------------------------------------------------------\n")
## Write some results into outfile
if(write.output == TRUE) {
# write data into csv file
output = data.frame(cinfo$ScientificName[cinfo$stock==stock], stock, StartYear, EndYear, mean(ct)*1000,
ifelse(Btype=="observed"|Btype=="simulate",bio[length(bio)],NA), # last biomass on record
ifelse(Btype == "observed"|Btype=="simulated",gm.MSY.jags,NA), # full Schaefer
ifelse(Btype == "observed"|Btype=="simulated",lcl.MSY.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",ucl.MSY.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",gm.r.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",lcl.r.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",ucl.r.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",gm.k.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",lcl.k.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",ucl.k.jags,NA),
r.est, lcl.r.est, ucl.r.est, # CMSY r
k.est, lcl.k.est, ucl.k.est, # CMSY k
MSY.est, lcl.MSY.est, ucl.MSY.est, # CMSY r
lastyr.bio, lcl.lastyr.bio, ucl.lastyr.bio, # last year bio
nextyr.bio, lcl.nextyr.bio, ucl.nextyr.bio)# last year + 1 bio
write.table(output, file=outfile, append = T, sep = ",",
dec = ".", row.names = FALSE, col.names = FALSE)
# write some text into text outfile.txt
cat("Species:", cinfo$ScientificName[cinfo$stock==stock], "\n",
"Name:", cinfo$EnglishName[cinfo$stock==stock], "\n",
"Region:", cinfo$Name[cinfo$stock==stock], "\n",
"Stock:",stock,"\n",
"Catch data used from years", min(yr),"-", max(yr),", biomass =", Btype, "\n",
"Prior initial relative biomass =", startbio[1], "-", startbio[2], "\n",
"Prior intermediate rel. biomass=", intbio[1], "-", intbio[2], "in year", intyr, "\n",
"Prior final relative biomass =", endbio[1], "-", endbio[2], "\n",
"Future crash with current catches?", FutureCrash, "\n",
"Prior range for r =", format(start_r[1],digits=2), "-", format(start_r[2],digits=2),
", prior range for k =", start_k[1], "-", start_k[2],"\n",
file=outfile.txt,append=T)
if(filename_1=="SimCatch.csv") {
cat(" True r =", r.sim, "(because input data were simulated with Schaefer model)\n",
"True k = 1000, true MSY =", 1000*r.sim/4,"\n",
"True biomass in last year =",bio[length(bio)],"or",bio[length(bio)]/1000,"k \n",
"True mean catch / MSY ratio =", mean(ct)/(1000*r.sim/4),"\n",
file=outfile.txt,append=T)
}
if(Btype == "observed"|Btype=="simulated") {
cat(" Results from Bayesian Schaefer model using catch & biomass \n",
"r =", gm.r.jags,", 95% CL =", lcl.r.jags, "-", ucl.r.jags,"\n",
"k =", gm.k.jags,", 95% CL =", lcl.k.jags, "-", ucl.k.jags,"\n",
"MSY =", gm.MSY.jags,", 95% CL =", lcl.MSY.jags, "-", ucl.MSY.jags,"\n",
"Mean catch / MSY =", mean(ct)/gm.MSY.jags,"\n",
file=outfile.txt,append=T)
}
cat(" Results of CMSY analysis with altogether", nviablepoints,"unique viable r-k pairs \n",
nviablepoints-length(rem.log.r),"r-k pairs above the initial geometric mean of r =", gm.rv, "were analysed\n",
"r =", r.est,", 95% CL =", lcl.r.est, "-", ucl.r.est,"\n",
"k =", k.est,", 95% CL =", lcl.k.est, "-", ucl.k.est,"\n",
"MSY =", MSY.est,", 95% CL =", lcl.MSY.est, "-", ucl.MSY.est,"\n",
"Predicted biomass last year b/k =", lastyr.bio, "2.5th perc b/k =", lcl.lastyr.bio,
"97.5th perc b/k =", ucl.lastyr.bio,"\n",
"Precautionary 25th percentile b/k =",q.btv[yr==EndYear],"\n",
"----------------------------------------------------------\n",
file=outfile.txt,append=T)
}
} # end of stocks loop

@ -0,0 +1,119 @@
##--------------------------------------------------------
## CMSY analysis with estimation of total biomass, including Bayesian Schaefer
## written by Rainer Froese with support from Gianpaolo Coro in 2013-2014
## This version adjusts biomass to average biomass over the year
## It also contains the FutureCrash option to improve prediction of final biomass
## Version 21 adds the purple point to indicate the 25th percentile of final biomass
## Version 22 accepts that no biomass or CPUE area available
##--------------------------------------------------------
library(R2jags) # Interface with JAGS
library(coda)
#-----------------------------------------
# Some general settings
#-----------------------------------------
# set.seed(999) # use for comparing results between runs
rm(list=ls(all=TRUE)) # clear previous variables etc
options(digits=3) # displays all numbers with three significant digits as default
graphics.off() # close graphics windows from previous sessions
#-----------------------------------------
# General settings for the analysis
#-----------------------------------------
sigR <- 0.02 # overall process error; 0.05 works reasonable for simulations, 0.02 for real data; 0 if deterministic model
n <- 10000 # initial number of r-k pairs
batch.mode <- T # set to TRUE to suppress graphs
write.output <- T # set to true if table of output is wanted
FutureCrash <- "No"
#-----------------------------------------
# Start output to screen
#-----------------------------------------
cat("-------------------------------------------\n")
cat("Catch-MSY Analysis,", date(),"\n")
cat("-------------------------------------------\n")
#------------------------------------------
# Read data and assign to vectors
#------------------------------------------
# filename_1 <- "AllStocks_Catch4.csv"
# filename_2 <- "AllStocks_ID4.csv"
# filename_1 <- "SimCatch.csv"
# filename_2 <- "SimSpec.csv"
# filename_2 <- "SimSpecWrongS.csv"
# filename_2 <- "SimSpecWrongI.csv"
# filename_2 <- "SimSpecWrongF.csv"
# filename_2 <- "SimSpecWrongH.csv"
# filename_2 <- "SimSpecWrongL.csv"
# filename_1 <- "FishDataLim.csv"
# filename_2 <- "FishDataLimSpec.csv"
filename_1 <- "WKLIFE4Stocks.csv"
filename_2 <- "WKLIFE4ID.csv"
outfile<-"outfile"
outfile.txt <- "outputfile.txt"
cdat <- read.csv(filename_1, header=T, dec=".", stringsAsFactors = FALSE)
cinfo <- read.csv(filename_2, header=T, dec=".", stringsAsFactors = FALSE)
cat("Files", filename_1, ",", filename_2, "read successfully","\n")
# Stocks with total biomass data and catch data from StartYear to EndYear
# stocks <- sort(as.character(cinfo$stock)) # All stocks
stocks<-"HLH_M07"
# select one stock after the other
for(stock in stocks) {
# assign data from cinfo to vectors
res <- as.character(cinfo$Resilience[cinfo$stock==stock])
StartYear <- as.numeric(cinfo$StartYear[cinfo$stock==stock])
EndYear <- as.numeric(cinfo$EndYear[cinfo$stock==stock])
r_low <- as.numeric(cinfo$r_low[cinfo$stock==stock])
r_hi <- as.numeric(cinfo$r_hi[cinfo$stock==stock])
stb_low <- as.numeric(cinfo$stb_low[cinfo$stock==stock])
stb_hi <- as.numeric(cinfo$stb_hi[cinfo$stock==stock])
intyr <- as.numeric(cinfo$intyr[cinfo$stock==stock])
intbio_low <- as.numeric(cinfo$intbio_low[cinfo$stock==stock])
intbio_hi <- as.numeric(cinfo$intbio_hi[cinfo$stock==stock])
endbio_low <- as.numeric(cinfo$endbio_low[cinfo$stock==stock])
endbio_hi <- as.numeric(cinfo$endbio_hi[cinfo$stock==stock])
Btype <- as.character(cinfo$Btype[cinfo$stock==stock])
FutureCrash <- as.character(cinfo$FutureCrash[cinfo$stock==stock])
comment <- as.character(cinfo$comment[cinfo$stock==stock])
# extract data on stock
yr <- as.numeric(cdat$yr[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])
ct <- as.numeric(cdat$ct[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that catch is given in tonnes, transforms to '000 tonnes
if(Btype=="observed" | Btype=="CPUE" | Btype=="simulated") {
bt <- as.numeric(cdat$TB[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that biomass is in tonnes, transforms to '000 tonnes
} else {bt <- NA}
nyr <- length(yr) # number of years in the time series
cat("->---------------------------------------
Species: NA
Name and region: NA , NA
Stock: HLH_M07
Catch data used from years 1 - 50
Prior initial relative biomass = 0.5 - 0.9
Prior intermediate rel. biomass= 0.01 - 0.4 in year 25
Prior final relative biomass = 0.4 - 0.8
If current catches continue, is the stock likely to crash within 3 years? No
Prior range for r = 0.2 - 0.8 , prior range for k = 125 - 9965
Results from Bayesian Schaefer model using catch & biomass ( simulated )
MSY = 91.7 , 95% CL = 83.9 - 100
Mean catch / MSY = 0.882
r = 0.425 , 95% CL = 0.374 - 0.483
k = 863 , 95% CL = 783 - 951
Results of CMSY analysis
Altogether 2055 unique viable r-k pairs were found
1142 r-k pairs above the initial geometric mean of r = 0.343 were analysed
r = 0.522 , 95% CL = 0.349 - 0.782
k = 683 , 95% CL = 438 - 1067
MSY = 89.2 , 95% CL = 82.2 - 96.7
Predicted biomass in last year = 0.676 2.5th perc = 0.435 97.5th perc = 0.768
Predicted biomass in next year = 0.673 2.5th perc = 0.433 97.5th perc = 0.758
----------------------------------------------------------
",file=outfile.txt,append=T)
}

@ -0,0 +1,17 @@
<?xml version='1.0' encoding='UTF-8'?>
<hibernate-configuration>
<session-factory>
<property name="connection.driver_class">org.postgresql.Driver</property>
<property name="connection.provider_class">org.hibernate.connection.C3P0ConnectionProvider</property>
<property name="connection.url">jdbc:postgresql://localhost/testdb</property>
<property name="connection.username">gcube</property>
<property name="connection.password">d4science2</property>
<property name="dialect">org.hibernate.dialect.PostgreSQLDialect</property>
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
<property name="c3p0.timeout">0</property>
<property name="c3p0.max_size">1</property>
<property name="c3p0.max_statements">0</property>
<property name="c3p0.min_size">1</property>
<property name="current_session_context_class">thread</property>
</session-factory>
</hibernate-configuration>

@ -0,0 +1,530 @@
#### R and JAGS code for estimating LWR-parameters from previous studies
#### Meant for updating the ESTIMATE table in FishBase
#### Created by Rainer Froese in March 2013, including JAGS models by James Thorston
#### Modified in June 2013 to include subfamilies
rm(list=ls(all=TRUE)) # remove previous variables and data
options(digits=3) # 3 significant digits as default
library(R2jags) # Interface with JAGS
runif(1) # sets random seed
#### Read in data
DataFile = "RF_LWR2.csv" # RF_LWR4 was extracted from FishBase in June 2013
Data = read.csv(DataFile, header=TRUE)
cat("Start", date(), "\n")
cat("Data file =", DataFile, "\n")
# Get unique, sorted list of Families
Fam.All <- sort(unique(as.character(Data$Family)))
Families <- Fam.All[Fam.All== "Acanthuridae" | Fam.All == "Achiridae"]
OutFile = "LWR_Test1.csv"
JAGSFILE = "dmnorm_0.bug"
# Get unique, sorted list of body shapes
Bshape <- sort(unique(as.character(Data$BodyShapeI)))
#------------------------------------------
# Functions
#------------------------------------------
#---------------------------------------------------------
# Function to get the priors for the respective body shape
#---------------------------------------------------------
Get.BS.pr <- function(BS) {
### Assignment of priors based on available body shape information
# priors derived from 5150 LWR studies in FishBase 02/2013
if (BS == "eel-like") { # eel-like prior for log(a) and b
prior_mean_log10a = -2.99
prior_sd_log10a = 0.175
prior_tau_log10a = 1/prior_sd_log10a^2
prior_mean_b = 3.06
prior_sd_b = 0.0896
prior_tau_b = 1/prior_sd_b^2
} else
if (BS == "elongated") { # elongate prior for log(a) and b
prior_mean_log10a = -2.41
prior_sd_log10a = 0.171
prior_tau_log10a = 1/prior_sd_log10a^2
prior_mean_b = 3.12
prior_sd_b = 0.09
prior_tau_b = 1/prior_sd_b^2
} else
if (BS == "fusiform / normal") { # fusiform / normal prior for log(a) and b
prior_mean_log10a = -1.95
prior_sd_log10a = 0.173
prior_tau_log10a = 1/prior_sd_log10a^2
prior_mean_b = 3.04
prior_sd_b = 0.0857
prior_tau_b = 1/prior_sd_b^2
} else
if (BS == "short and / or deep") { # short and / or deep prior for log(a) and b
prior_mean_log10a = -1.7
prior_sd_log10a = 0.175
prior_tau_log10a = 1/prior_sd_log10a^2
prior_mean_b = 3.01
prior_sd_b = 0.0905
prior_tau_b = 1/prior_sd_b^2
} else
# priors across all shapes, used for missing or other BS
{
prior_mean_log10a = -2.0
prior_sd_log10a = 0.313
prior_tau_log10a = 1/prior_sd_log10a^2
prior_mean_b = 3.04
prior_sd_b = 0.119
prior_tau_b = 1/prior_sd_b^2
}
# Priors for measurement error (= sigma) based on 5150 studies
# given here as shape mu and rate r, for gamma distribution
SD_rObs_log10a = 6520
SD_muObs_log10a = 25076
SD_rObs_b = 6808
SD_muObs_b = 37001
# Priors for between species variability (= sigma) based on 5150 studies for 1821 species
SD_rGS_log10a = 1372
SD_muGS_log10a = 7933
SD_rGS_b = 572
SD_muGS_b = 6498
prior.list <- list(mean_log10a=prior_mean_log10a, sd_log10a=prior_sd_log10a,
tau_log10a=prior_tau_log10a, mean_b=prior_mean_b, sd_b=prior_sd_b,
tau_b=prior_tau_b, SD_rObs_log10a=SD_rObs_log10a, SD_muObs_log10a=SD_muObs_log10a,
SD_rObs_b=SD_rObs_b, SD_muObs_b=SD_muObs_b, SD_rGS_log10a=SD_rGS_log10a,
SD_muGS_log10a=SD_muGS_log10a, SD_rGS_b=SD_rGS_b, SD_muGS_b=SD_muGS_b)
return(prior.list)
}
#--------------------------------------------------------------------
# Function to do a Bayesian analysis including LWR from relatives
#--------------------------------------------------------------------
SpecRelLWR <- function(a, b, wts, GenusSpecies, Nspecies, prior_mean_b, prior_tau_b,
prior_mean_log10a, prior_tau_log10a, SD_rObs_log10a, SD_muObs_log10a,
SD_rObs_b, SD_muObs_b, SD_rGS_log10a, SD_muGS_log10a,
SD_rGS_b, SD_muGS_b){
### Define JAGS model
Model = "
model {
#### Process model -- effects of taxonomy
# given the likelihood distributions and the priors,
# create normal posterior distributions for log10a, b,
# and for the process error (=between species variability sigmaGS)
abTrue[1] ~ dnorm(prior_mean_log10a,prior_tau_log10a)
abTrue[2] ~ dnorm(prior_mean_b,prior_tau_b)
sigmaGSlog10a ~ dgamma( SD_rGS_log10a, SD_muGS_log10a)
sigmaGSb ~ dgamma( SD_rGS_b, SD_muGS_b)
# given the posterior distributions and the process errors,
# establish for every species the expected witin-species
# parameter distributions; no correlation roGS between species
roGS <- 0
tauGenusSpecies[1] <- pow(sigmaGSlog10a,-2)
tauGenusSpecies[2] <- pow(sigmaGSb,-2)
for(k in 1:Nspecies){
abGenusSpecies[k,1] ~ dnorm(abTrue[1],tauGenusSpecies[1])
abGenusSpecies[k,2] ~ dnorm(abTrue[2],tauGenusSpecies[2])
}
### Observation model
## Errors
# given the data and the priors, establish distributions
# for the observation errors sigmaObs
sigmaObslog10a ~ dgamma( SD_rObs_log10a, SD_muObs_log10a)
sigmaObsb ~ dgamma( SD_rObs_b, SD_muObs_b)
# create inverse covariance matrix, with negative parameter correlation roObs
roObs ~ dunif(-0.99,0)
CovObs[1,1] <- pow(sigmaObslog10a,2)
CovObs[2,2] <- pow(sigmaObsb,2)
CovObs[1,2] <- roObs * sigmaObslog10a * sigmaObsb
CovObs[2,1] <- CovObs[1,2]
TauObs[1:2,1:2] <- inverse(CovObs[1:2,1:2])
## likelihood
# given the data, the priors and the covariance,
# create multivariate likelihood distributions for log10(a) and b
for(i in 1:N){
TauObsI[i,1:2,1:2] <- TauObs[1:2,1:2] * pow(Weights[i],2) # weighted precision
ab[i,1:2] ~ dmnorm(abGenusSpecies[GenusSpecies[i],1:2],TauObsI[i,1:2,1:2])
}
}
"
# Write JAGS model
cat(Model, file=JAGSFILE)
# JAGS settings
Nchains = 3 # number of MCMC chains to be used in JAGS
Nburnin = 1e4 # number of burn-in iterations, to be discarded; 1e4 = 10000 iterations for burn-in
Niter = 3e4 # number of iterations after burn-in; 3e4 = 30000 iterations
Nthin = 1e1 # subset of iterations to be used for analysis; 1e1 = every 10th iteration
# Run JAGS: define data to be passed on in DataJags;
# determine parameters to be returned in Param2Save;
# call JAGS with function Jags()
DataJags = list(ab=cbind(log10(a),b), N=length(a), Weights=wts, Nspecies=Nspecies, GenusSpecies=GenusSpecies,
prior_mean_b=prior_mean_b, prior_tau_b=prior_tau_b,
prior_mean_log10a=prior_mean_log10a, prior_tau_log10a=prior_tau_log10a,
SD_rObs_log10a=SD_rObs_log10a, SD_muObs_log10a=SD_muObs_log10a,
SD_rObs_b=SD_rObs_b, SD_muObs_b=SD_muObs_b,
SD_rGS_log10a=SD_rGS_log10a, SD_muGS_log10a=SD_muGS_log10a,
SD_rGS_b=SD_rGS_b, SD_muGS_b=SD_muGS_b)
Params2Save = c("abTrue","abGenusSpecies","sigmaGSlog10a","sigmaGSb","sigmaObslog10a","sigmaObsb","roObs")
Jags <- jags(inits=NULL, model.file=JAGSFILE, working.directory=NULL, data=DataJags,
parameters.to.save=Params2Save, n.chains=Nchains, n.thin=Nthin, n.iter=Niter, n.burnin=Nburnin)
Jags$BUGSoutput # contains the results from the JAGS run
# Analyze output for the relatives
abTrue <- Jags$BUGSoutput$sims.list$abTrue
R_mean_log10a <- mean(abTrue[,1]) # true mean of log10(a)
R_sd_log10a <- sd(abTrue[,1]) # true SE of log10(a)
R_mean_b <- mean(abTrue[,2]) # true mean of b
R_sd_b <- sd(abTrue[,2]) # true SE of b
# Analyze output for the target species
abGenusSpecies <- Jags$BUGSoutput$sims.list$abGenusSpecies
mean_log10a <- mean(abGenusSpecies[,1,1]) # true mean of log10(a) for the first species= target species
sd_log10a <- sd(abGenusSpecies[,1,1]) # true SE of log10(a)
mean_b <- mean(abGenusSpecies[,1,2]) # true mean of b
sd_b <- sd(abGenusSpecies[,1,2]) # true SE of b
mean_sigma_log10a <- mean(Jags$BUGSoutput$sims.list$sigmaObslog10a) # measurement error of log10(a)
sd_sigma_log10a <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigmaObslog10a), 2, sd)
mean_sigma_b <- mean(Jags$BUGSoutput$sims.list$sigmaObsb) # measurement error of b
sd_sigma_b <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigmaObsb), 2, sd)
ro_ab <- mean(Jags$BUGSoutput$sims.list$roObs) # measurement correlation of log10(a),b
out.list <- list(N=length(a), mean_log10a=mean_log10a, sd_log10a=sd_log10a, mean_b=mean_b, sd_b=sd_b,
R_mean_log10a=R_mean_log10a, R_sd_log10a=R_sd_log10a, R_mean_b=R_mean_b, R_sd_b=R_sd_b)
return(out.list)
}
#-----------------------------------------------------------------------------
# Function to do a Bayesian LWR analysis with studies for target species only
#-----------------------------------------------------------------------------
SpecLWR <- function(a, b, wts, prior_mean_b, prior_tau_b,
prior_mean_log10a, prior_tau_log10a, SD_rObs_log10a, SD_muObs_log10a,
SD_rObs_b, SD_muObs_b, SD_rGS_log10a, SD_muGS_log10a,
SD_rGS_b, SD_muGS_b){
# Define JAGS model
Model = "
model {
sigma1 ~ dgamma( SD_rObs_log10a, SD_muObs_log10a) # posterior distribution for measurement error in log10a
sigma2 ~ dgamma( SD_rObs_b, SD_muObs_b) # posterior distribution for measurement error in log10a
ro ~ dunif(-0.99,0) # uniform prior for negative correlation between log10a and b
abTrue[1] ~ dnorm(prior_mean_log10a,prior_tau_log10a) # normal posterior distribution for log10a
abTrue[2] ~ dnorm(prior_mean_b,prior_tau_b) # normal posterior distribution for b
CovObs[1,1] <- pow(sigma1,2)
CovObs[2,2] <- pow(sigma2,2)
CovObs[1,2] <- ro * sigma1 * sigma2
CovObs[2,1] <- CovObs[1,2]
TauObs[1:2,1:2] <- inverse(CovObs[1:2,1:2]) # create inverse covariance matrix
for(i in 1:N){
TauObsI[i,1:2,1:2] <- TauObs[1:2,1:2] * pow(Weights[i],2) # converts prior SD into prior weighted precision
# given the data, the priors and the covariance, create multivariate normal posteriors for log(a) and b
ab[i,1:2] ~ dmnorm(abTrue[1:2],TauObsI[i,1:2,1:2])
}
}
"
# Write JAGS model
cat(Model, file=JAGSFILE)
# JAGS settings
Nchains = 3 # number of MCMC chains to be used in JAGS
Nburnin = 1e4 # number of burn-in runs, to be discarded; 10000 iterations for burn-in
Niter = 3e4 # number of iterations after burn-in; 3e4 = 30000 iterations
Nthin = 1e1 # subset of iterations to be used for analysis; 1e1 = every 10th iteration
# Run JAGS: define data to be passed on in DataJags; determine parameters to be returned in Param2Save; call JAGS with function Jags()
DataJags = list(ab=cbind(log10(a),b), N=length(a), Weights=wts, prior_mean_b=prior_mean_b,
prior_tau_b=prior_tau_b, prior_mean_log10a=prior_mean_log10a, prior_tau_log10a=prior_tau_log10a,
SD_rObs_log10a=SD_rObs_log10a, SD_muObs_log10a=SD_muObs_log10a,
SD_rObs_b=SD_rObs_b, SD_muObs_b=SD_muObs_b)
Params2Save = c("abTrue","sigma1","sigma2","ro")
Jags <- jags(inits=NULL, model.file=JAGSFILE, working.directory=NULL, data=DataJags, parameters.to.save=Params2Save, n.chains=Nchains, n.thin=Nthin, n.iter=Niter, n.burnin=Nburnin)
Jags$BUGSoutput # contains the results from the JAGS run
# Analyze output
abTrue <- Jags$BUGSoutput$sims.list$abTrue
mean_log10a <- mean(abTrue[,1]) # true mean of log10(a)
sd_log10a <- sd(abTrue[,1]) # true SE of log10(a)
mean_b <- mean(abTrue[,2]) # true mean of b
sd_b <- sd(abTrue[,2]) # true SE of b
mean_sigma_log10a <- mean(Jags$BUGSoutput$sims.list$sigma1) # measurement error of log10(a)
sd_sigma_log10a <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigma1), 2, sd)
mean_sigma_b <- mean(Jags$BUGSoutput$sims.list$sigma2) # measurement error of b
sd_sigma_b <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigma2), 2, sd)
ro_ab <- mean(Jags$BUGSoutput$sims.list$ro) # measurement correlation of log10(a),b
out.list <- list(N=length(a), mean_log10a=mean_log10a, sd_log10a=sd_log10a, mean_b=mean_b, sd_b=sd_b)
return(out.list)
} # End of Functions section
#--------------------------------
# Analysis by Family
#--------------------------------
# Do LWR analysis by Family, Subfamily and Body shape, depending on available LWR studies
# for(Fam in "Acanthuridae") {
for(Fam in Families) {
Subfamilies <- sort(unique(Data$Subfamily[Data$Family==Fam]))
for(SF in Subfamilies) {
for(BS in Bshape) {
# get species (SpecCodes) in this Subfamily and with this body shape
SpecCode.SF.BS <- unique(Data$SpecCode[Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS])
# if there are species with this body shape
if(length(SpecCode.SF.BS > 0)) {
# get priors for this body shape
prior <- Get.BS.pr(BS)
# get LWR data for this body shape
b_raw <- Data$b[Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS]
cat("\n")
cat("Family =", Fam, ", Subfamily =", SF, ", Body shape =", BS, ", Species =", length(SpecCode.SF.BS), ", LWR =",
length(b_raw[is.na(b_raw)==F]), "\n")
# if no LWR studies exist for this body shape, assign the respective priors to all species
if(length(b_raw[is.na(b_raw)==F])==0) {
# assign priors to species with no LWR in this Subfamily with this body shape
cat("Assigning overall body shape prior to", length(SpecCode.SF.BS), " species \n")
for(SpC in SpecCode.SF.BS) {
out.prior <- data.frame(Fam, SF, BS, SpC, 0, prior$mean_log10a, prior$sd_log10a, prior$mean_b, prior$sd_b,
paste("all LWR estimates for this BS"))
write.table(out.prior, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
}
} else {
# Update priors for this body shape using existing LWR studies
# get LWR data for this Subfamily and body shape
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0)
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
a <- Data$a[Keep]
b <- Data$b[Keep]
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
# add a first dummy record with prior LWR and low score = 0.3, as pseudo target species
# Name of dummy target species is Dum1 dum1
TargetSpec = paste("Dum1", "dum1")
wts <- c(0.3, wts)
a <- c(10^(prior$mean_log10a), a)
b <- c(prior$mean_b, b)
GenSpec <- c(TargetSpec, GenSpec)
# Relabel GenSpec so that TargetSpec = level 1
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
Nspecies = nlevels(GenusSpecies) # number of species
# run Bayesian analysis for pseudo target species with Subfamily members
# The resulting R_mean_log10a, R_sd_log10a, R_mean_b, R_sd_b will be used for species without LWR
cat("Updating Subfamily-Bodyshape prior using", Nspecies-1, "species with LWR studies \n")
prior.SFam.BS <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies, prior_mean_b=prior$mean_b,
prior_tau_b=prior$tau_b, prior_mean_log10a=prior$mean_log10a,
prior_tau_log10a=prior$tau_log10a, SD_rObs_log10a=prior$SD_rObs_log10a,
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
SD_muGS_b=prior$SD_muGS_b)
#------------------------------------------------------------------------------------------
# if there are Genera with >= 5 species with LWR, update body shape priors for these Genera
#------------------------------------------------------------------------------------------
Genera <- unique(as.character(Data$Genus[Keep]))
# create empty list of lists for storage of generic priors
prior.Gen.BS <- rep(list(list()),length(Genera)) # create a list of empty lists
names(prior.Gen.BS) <- Genera # name the list elements according to the Genera
for(Genus in Genera){
# check if Genus contains >= 5 species with LWR data
if(length(unique(Data$SpecCode[Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F &
Data$Score>0 & Data$Genus==Genus]))>=5) {
# run Subfamily analysis with only data for this genus
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0 &
Data$Genus==Genus)
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
a <- Data$a[Keep]
b <- Data$b[Keep]
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
# add a first dummy record with prior LWR and low score = 0.3, as pseudo target species
# Name of dummy target species is Dum1 dum1
TargetSpec = paste("Dum1", "dum1")
wts <- c(0.3, wts)
a <- c(10^(prior$mean_log10a), a)
b <- c(prior$mean_b, b)
GenSpec <- c(TargetSpec, GenSpec)
# Relabel GenSpec so that TargetSpec = level 1
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
Nspecies = nlevels(GenusSpecies) # number of species
# run Bayesian analysis for pseudo target species with Genus members
# R_mean_log10a, R_sd_log10a, R_mean_b, R_sd_b will be used for species without LWR
cat("Updating prior for Genus =", Genus, ", with", Nspecies -1, "LWR Species \n")
prior.Gen.BS[[Genus]] <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies,
prior_mean_b=prior.SFam.BS$R_mean_b,
prior_tau_b=1/prior.SFam.BS$R_sd_b^2,
prior_mean_log10a=prior.SFam.BS$R_mean_log10a,
prior_tau_log10a=1/prior.SFam.BS$R_sd_log10a,
SD_rObs_log10a=prior$SD_rObs_log10a,
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
SD_muGS_b=prior$SD_muGS_b)
}
}
# new Subfamily-BS priors have been generated
# for some genera, new Genus-BS priors have been generated
# ---------------------------------------------------------------------
# Loop through all species in this Subfamily-BS; assign LWR as appropriate
# ---------------------------------------------------------------------
for(SpC in SpecCode.SF.BS) {
Genus <- as.character(unique(Data$Genus[Data$SpecCode==SpC]))
Species <- as.character(unique(Data$Species[Data$SpecCode==SpC]))
TargetSpec = paste(Genus, Species)
LWR <- length(Data$b[Data$SpecCode==SpC & is.na(Data$b)==F & Data$Score>0])
LWRGenspec <- length(unique(Data$SpecCode[Data$BodyShapeI==BS & is.na(Data$b)==F &
Data$Score>0 & Data$Genus==Genus]))
LWRSFamspec <- length(unique(Data$SpecCode[Data$BodyShapeI==BS & is.na(Data$b)==F &
Data$Score>0 & Data$Family==Fam & Data$Subfamily==SF]))
#---------------------------------------------------------
# >= 5 LWR in target species, run single species analysis
if(LWR >= 5) {
# Run analysis with data only for this species
Keep <- which(Data$SpecCode==SpC & is.na(Data$b)==F & Data$Score>0)
wts = Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
a = Data$a[Keep]
b = Data$b[Keep]
# determine priors to be used
if(LWRGenspec >= 5) {
prior_mean_b=prior.Gen.BS[[Genus]]$R_mean_b
prior_tau_b=1/prior.Gen.BS[[Genus]]$R_sd_b^2
prior_mean_log10a=prior.Gen.BS[[Genus]]$R_mean_log10a
prior_tau_log10a=1/prior.Gen.BS[[Genus]]$R_sd_log10a^2
} else
if (LWRSFamspec > 0) {
prior_mean_b=prior.SFam.BS$R_mean_b
prior_tau_b=1/prior.SFam.BS$R_sd_b^2
prior_mean_log10a=prior.SFam.BS$R_mean_log10a
prior_tau_log10a=1/prior.SFam.BS$R_sd_log10a^2
} else {
prior_mean_b=prior$mean_b
prior_tau_b=prior$tau_b
prior_mean_log10a=prior$mean_log10a
prior_tau_log10a=prior$tau_log10a
}
cat("Running single species analysis for", TargetSpec, "LWR =", LWR, ", LWR species in Genus=",LWRGenspec,"\n" )
# call function for single species analysis
post <- SpecLWR(a, b, wts, prior_mean_b=prior_mean_b,
prior_tau_b=prior_tau_b, prior_mean_log10a=prior_mean_log10a,
prior_tau_log10a=prior_tau_log10a, SD_rObs_log10a=prior$SD_rObs_log10a,
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
SD_muGS_b=prior$SD_muGS_b)
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(post$mean_log10a, digits=3), format(post$sd_log10a, digits=3), format(post$mean_b, disgits=3), format(post$sd_b, digits=3),
paste("LWR estimates for this species"))
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
} else
#--------------------------------------------------------
# 1-4 LWR in target species and >= 5 LWR species in Genus
# run hierarchical analysis for genus members, with Subfamily-BS prior
if(LWR >= 1 & LWRGenspec >=5) {
# run Subfamily analysis with only data for this genus
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0 &
Data$Genus==Genus)
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
a <- Data$a[Keep]
b <- Data$b[Keep]
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
# Relabel GenSpec so that TargetSpec = level 1
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
Nspecies = nlevels(GenusSpecies) # number of species
# run Bayesian analysis for target species with Genus members
cat("Running analysis with congeners for", TargetSpec, ", LWR =", LWR,", LWR species in Genus =", LWRGenspec,"\n")
post <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies,
prior_mean_b=prior.SFam.BS$R_mean_b,
prior_tau_b=1/prior.SFam.BS$R_sd_b^2,
prior_mean_log10a=prior.SFam.BS$R_mean_log10a,
prior_tau_log10a=1/prior.SFam.BS$R_sd_log10a^2,
SD_rObs_log10a=prior$SD_rObs_log10a,
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
SD_muGS_b=prior$SD_muGS_b)
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(post$mean_log10a, digits=3), format(post$sd_log10a, digits=3), format(post$mean_b, disgits=3), format(post$sd_b, digits=3),
paste("LWR estimates for species & Genus-BS"))
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
} else
#-------------------------------------------------------
# 1-4 LWR in target species and < 5 LWR species in Genus
# run hierarchical analysis for Subfamily members, with bodyshape prior
if(LWR >= 1 & LWRSFamspec > 1) {
# run Subfamily analysis
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0)
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
a <- Data$a[Keep]
b <- Data$b[Keep]
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
# Relabel GenSpec so that TargetSpec = level 1
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
Nspecies = nlevels(GenusSpecies) # number of species
# run Bayesian analysis for target species with Subfamily members
cat("Running analysis with Subfamily members for", TargetSpec, ", LWR =", LWR,", LWR species in Subfamily-BS =",
LWRSFamspec, "\n")
post <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies,
prior_mean_b=prior$mean_b,
prior_tau_b=prior$tau_b,
prior_mean_log10a=prior$mean_log10a,
prior_tau_log10a=prior$tau_log10a,
SD_rObs_log10a=prior$SD_rObs_log10a,
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
SD_muGS_b=prior$SD_muGS_b)
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(post$mean_log10a, digits=3), format(post$sd_log10a, digits=3),
format(post$mean_b, disgits=3), format(post$sd_b, digits=3),
paste("LWR estimates for species & Subfamily-BS"))
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
} else
#--------------------------------------------------
# assign Genus-BS priors to target species
if(LWRGenspec >= 5) {
cat("Assign Genus-BS prior for", TargetSpec, "\n")
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(prior.Gen.BS[[Genus]]$mean_log10a, digits=3),
format(prior.Gen.BS[[Genus]]$sd_log10a, digits=3),
format(prior.Gen.BS[[Genus]]$mean_b, digits=3), format(prior.Gen.BS[[Genus]]$sd_b, digits=3),
paste("LWR estimates for this Genus-BS"))
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
} else {
# -----------------------------------------------
# assign Subfamily-BS priors to target species
cat("Assign Subfamily-BS prior for", TargetSpec,"\n")
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(prior.SFam.BS$mean_log10a, digits=3), format(prior.SFam.BS$sd_log10a, digits=3),
format(prior.SFam.BS$mean_b, digits=3), format(prior.SFam.BS$sd_b, digits=3), paste("LWR estimates for this Subfamily-BS"))
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
}
} # end of species loop for this Subfamily and body shape
} # end of section dealing with Subfamily - body shapes that contain LWR estimates
} # end of section that deals with Subfamily - body shapes that contain species
} # end of body shape section
} # end of Subfamily section
} # end of Family section
cat("End", date(),"\n")

Binary file not shown.

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<jardesc>
<jar path="EcologicalEngineExecutor/AQUAMAPS_SUITABLE/aquamapsnode.jar"/>
<options buildIfNeeded="true" compress="true" descriptionLocation="/EcologicalEngineExecutor/aquamapsjarcreator.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
<selectedProjects/>
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
<sealing sealJar="false">
<packagesToSeal/>
<packagesToUnSeal/>
</sealing>
</manifest>
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java&lt;org.gcube.dataanalysis.executor.generators"/>
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java&lt;org.gcube.dataanalysis.executor.nodes.algorithms"/>
</selectedElements>
</jardesc>

@ -0,0 +1,32 @@
#### Use two appenders, one to log to console, another to log to a file
log4j.rootCategory= R
#### First appender writes to console
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{dd/MM/yyyy HH:mm:ss} %p %t %c - %m%n
#log4j.appender.stdout.layout.ConversionPattern=%m%n
#log4j.appender.stdout.File=Analysis.log
#### Second appender writes to a file
log4j.logger.AnalysisLogger=trace,stdout, R
log4j.appender.R=org.apache.log4j.RollingFileAppender
#log4j.appender.R=org.apache.log4j.AsyncAppender
#log4j.appender.R.Threshold=INFO
log4j.appender.R.File=Analysis.log
log4j.appender.R.MaxFileSize=50000KB
log4j.appender.R.MaxBackupIndex=2
log4j.appender.R.layout=org.apache.log4j.PatternLayout
log4j.appender.R.layout.ConversionPattern=%d{dd/MM/yyyy HH:mm:ss} %p %t %c - %m%n
#log4j.appender.R.layout.ConversionPattern=%m%n
#### Third appender writes to a file
log4j.logger.org.hibernate=H
#log4j.appender.H=org.apache.log4j.RollingFileAppender
log4j.appender.H=org.apache.log4j.AsyncAppender
#log4j.appender.H.File=HibernateLog.log
#log4j.appender.H.MaxFileSize=1024KB
#log4j.appender.H.MaxBackupIndex=2
log4j.appender.H.layout=org.apache.log4j.PatternLayout
log4j.appender.H.layout.ConversionPattern=%d{dd/MM/yyyy HH:mm:ss} %p %t %c - %m%n

@ -0,0 +1,18 @@
<?xml version='1.0' encoding='UTF-8'?>
<hibernate-configuration>
<session-factory>
<property name="connection.driver_class">org.postgresql.Driver</property>
<property name="connection.provider_class">org.hibernate.connection.C3P0ConnectionProvider</property>
<property name="connection.url">jdbc:postgresql://localhost/testdb</property>
<property name="connection.username">gcube</property>
<property name="connection.password">d4science2</property>
<!-- <property name="dialect">org.hibernatespatial.postgis.PostgisDialect</property>-->
<property name="dialect">org.hibernate.dialect.PostgreSQLDialect</property>
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
<property name="c3p0.timeout">0</property>
<property name="c3p0.max_size">10</property>
<property name="c3p0.max_statements">0</property>
<property name="c3p0.min_size">1</property>
<property name="current_session_context_class">thread</property>
</session-factory>
</hibernate-configuration>

@ -0,0 +1,9 @@
AQUAMAPS_SUITABLE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable
AQUAMAPS_NATIVE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative
AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative2050
AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable2050
AQUAMAPS_NATIVE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNN
AQUAMAPS_SUITABLE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNNSuitable
FEED_FORWARD_A_N_N_DISTRIBUTION=org.gcube.dataanalysis.ecoengine.spatialdistributions.FeedForwardNeuralNetworkDistribution
LWR=org.gcube.dataanalysis.executor.nodes.algorithms.LWR
CMSY=org.gcube.dataanalysis.executor.nodes.algorithms.CMSY

@ -0,0 +1,3 @@
DBSCAN=org.gcube.dataanalysis.ecoengine.clustering.DBScan
KMEANS=org.gcube.dataanalysis.ecoengine.clustering.KMeans
XMEANS=org.gcube.dataanalysis.ecoengine.clustering.XMeansWrapper

@ -0,0 +1,3 @@
DISCREPANCY_ANALYSIS=org.gcube.dataanalysis.ecoengine.evaluation.DiscrepancyAnalysis
QUALITY_ANALYSIS=org.gcube.dataanalysis.ecoengine.evaluation.DistributionQualityAnalysis
HRS=org.gcube.dataanalysis.ecoengine.evaluation.HabitatRepresentativeness

@ -0,0 +1,6 @@
LOCAL_WITH_DATABASE=org.gcube.dataanalysis.ecoengine.processing.LocalSplitGenerator
SIMPLE_LOCAL=org.gcube.dataanalysis.ecoengine.processing.LocalSimpleSplitGenerator
D4SCIENCE=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing
#OCCURRENCES_MERGER=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing
OCCURRENCES_INTERSECTOR=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing
#OCCURRENCES_SUBTRACTION=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing

@ -0,0 +1,439 @@
cat("Retrieving Input Parameters\n")
inputFile<-'tacsat.csv'
outputFile<-'tacsat_interpolated.csv'
require(data.table)
print(Sys.time())
memory.size(max = TRUE)
memory.limit(size = 4000)
interCubicHermiteSpline <- function(spltx,spltCon,res,params,headingAdjustment){
#Formula of Cubic Hermite Spline
t <- seq(0,1,length.out=res)
F00 <- 2*t^3 -3*t^2 + 1
F10 <- t^3-2*t^2+t
F01 <- -2*t^3+3*t^2
F11 <- t^3-t^2
#Making tacsat dataset ready
spltx[spltCon[,1],"SI_HE"][which(is.na(spltx[spltCon[,1],"SI_HE"]))] <- 0
spltx[spltCon[,2],"SI_HE"][which(is.na(spltx[spltCon[,2],"SI_HE"]))] <- 0
#Heading at begin point in degrees
Hx0 <- sin(spltx[spltCon[,1],"SI_HE"]/(180/pi))
Hy0 <- cos(spltx[spltCon[,1],"SI_HE"]/(180/pi))
#Heading at end point in degrees
Hx1 <- sin(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi))
Hy1 <- cos(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi))
#Start and end positions
Mx0 <- spltx[spltCon[,1],"SI_LONG"]
Mx1 <- spltx[spltCon[,2],"SI_LONG"]
My0 <- spltx[spltCon[,1],"SI_LATI"]
My1 <- spltx[spltCon[,2],"SI_LATI"]
#Corrected for longitude lattitude effect
Hx0 <- Hx0 * params$fm * spltx[spltCon[,1],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
Hx1 <- Hx1 * params$fm * spltx[spltCon[,2],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
Hy0 <- Hy0 * params$fm * lonLatRatio(spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,1],"SI_LATI"]) * spltx[spltCon[,1],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
Hy1 <- Hy1 * params$fm * lonLatRatio(spltx[spltCon[,2],"SI_LONG"],spltx[spltCon[,2],"SI_LATI"]) * spltx[spltCon[,2],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
#Get the interpolation
fx <- outer(F00,Mx0,"*")+outer(F10,Hx0,"*")+outer(F01,Mx1,"*")+outer(F11,Hx1,"*")
fy <- outer(F00,My0,"*")+outer(F10,Hy0,"*")+outer(F01,My1,"*")+outer(F11,Hy1,"*")
#Create output format
intsx <- lapply(as.list(1:nrow(spltCon)),function(x){
matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2,
dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))})
return(intsx)}
rbindTacsat <- function(set1,set2){
cln1 <- colnames(set1)
cln2 <- colnames(set2)
if(any(duplicated(cln1)==TRUE) || any(duplicated(cln2)==TRUE)) stop("Duplicate column names in datasets")
idx1 <- which(is.na(pmatch(cln1,cln2))==TRUE)
idx2 <- which(is.na(pmatch(cln2,cln1))==TRUE)
if(length(idx1)>0){
for(i in idx1) set2 <- cbind(set2,NA)
colnames(set2) <- c(cln2,cln1[idx1])}
if(length(idx2)>0){
for(i in idx2) set1 <- cbind(set1,NA)
colnames(set1) <- c(cln1,cln2[idx2])}
cln1 <- colnames(set1)
cln2 <- colnames(set2)
mtch <- pmatch(cln1,cln2)
if(any(is.na(mtch))==TRUE) stop("Cannot find nor create all matching column names")
set3 <- rbind(set1,set2[,cln2[mtch]])
return(set3)}
bearing <- function(lon,lat,lonRef,latRef){
x1 <- lon
y1 <- lat
x2 <- lonRef
y2 <- latRef
y <- sin((x2-x1)*pi/180) * cos(y2*pi/180)
x <- cos(y1*pi/180) * sin(y2*pi/180) - sin(y1*pi/180) * cos(y2*pi/180) * cos((x2-x1)*pi/180)
bearing <- atan2(y,x)*180/pi
bearing <- (bearing + 360)%%360
return(bearing)}
`distance` <-
function(lon,lat,lonRef,latRef){
pd <- pi/180
a1<- sin(((latRef-lat)*pd)/2)
a2<- cos(lat*pd)
a3<- cos(latRef*pd)
a4<- sin(((lonRef-lon)*pd)/2)
a <- a1*a1+a2*a3*a4*a4
c <- 2*atan2(sqrt(a),sqrt(1-a));
return(6371*c)}
distanceInterpolation <- function(interpolation){
res <- unlist(lapply(interpolation,function(x){
dims <- dim(x)
res <- distance(x[3:dims[1],1],x[3:dims[1],2],x[2:(dims[1]-1),1],x[2:(dims[1]-1),2])
return(sum(res,na.rm=TRUE))}))
return(res)}
equalDistance <- function(interpolation,res=10){
#Calculate ditance of all interpolations at the same time
totDist <- distanceInterpolation(interpolation)
#Get dimensions of interpolations
lngInt <- lapply(interpolation,dim)
#Warn if resolution of equal distance is too high compared to original resolution of interpolation
if(min(unlist(lngInt)[seq(1,length(totDist),2)],na.rm=TRUE) < 9*res) warnings("Number of intermediate points in the interpolation might be too small for the equal distance pionts chosen")
#Get distance steps to get equal distance
eqStep <- totDist/(res-1)
#Get x-y values of all interpolations
intidx <- matrix(unlist(lapply(interpolation,function(x){return(x[1,])})),ncol=2,byrow=TRUE)
#Do the calculation
result <- lapply(interpolation,function(ind){
i <- which(intidx[,1] == ind[1,1] & intidx[,2] == ind[1,2])
idx <- apply(abs(outer(
cumsum(distance(ind[3:lngInt[[i]][1],1],ind[3:lngInt[[i]][1],2],ind[2:(lngInt[[i]][1]-1),1],ind[2:(lngInt[[i]][1]-1),2])),
seq(eqStep[i],totDist[i],eqStep[i]),
"-")),
2,which.min)+1
idx <- c(1,idx)
return(ind[c(1,idx+1),])})
#Return the equal distance interpolated set in the same format as the interpolated dataset (as a list)
return(result)}
interStraightLine <- function(spltx,spltCon,res){
fx <- mapply(seq,spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,2],"SI_LONG"],length.out=res)
fy <- mapply(seq,spltx[spltCon[,1],"SI_LATI"],spltx[spltCon[,2],"SI_LATI"],length.out=res)
#Create output format
intsx <- lapply(as.list(1:nrow(spltCon)),function(x){
matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2,
dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))})
return(intsx)}
interpolation2Tacsat <- function(interpolation,tacsat,npoints=10,equalDist=TRUE){
# This function takes the list of tracks output by interpolateTacsat and converts them back to tacsat format.
# The npoints argument is the optional number of points between each 'real' position.
tacsat <- sortTacsat(tacsat)
if(!"HL_ID" %in% colnames(tacsat)) tacsat$HL_ID <- 1:nrow(tacsat)
if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
if(equalDist){
interpolationEQ <- equalDistance(interpolation,npoints) #Divide points equally along interpolated track (default is 10).
} else {
interpolationEQ <- lapply(interpolation,function(x){idx <- round(seq(2,nrow(x),length.out=npoints)); return(x[c(1,idx),])})
}
res <- lapply(interpolationEQ,function(x){
idx <- unlist(x[1,1:2]@.Data); x <- data.frame(x)
colnames(x) <- c("SI_LONG","SI_LATI")
cls <- which(apply(tacsat[c(idx),],2,function(y){return(length(unique(y)))})==1)
for(i in cls){
x <- cbind(x,rep(tacsat[idx[1],i],nrow(x)));
colnames(x) <- c(colnames(x)[1:(ncol(x)-1)],colnames(tacsat)[i])
}
if(!"VE_COU" %in% colnames(x)) x$VE_COU <- rep(tacsat$VE_COU[idx[1]],nrow(x))
if(!"VE_REF" %in% colnames(x)) x$VE_REF <- rep(tacsat$VE_REF[idx[1]],nrow(x))
if(!"FT_REF" %in% colnames(x)) x$FT_REF <- rep(tacsat$FT_REF[idx[1]],nrow(x))
x$SI_DATIM <- tacsat$SI_DATIM[idx[1]]
x$SI_DATIM[-c(1:2)] <- as.POSIXct(cumsum(rep(difftime(tacsat$SI_DATIM[idx[2]],tacsat$SI_DATIM[idx[1]],units="secs")/(nrow(x)-2),nrow(x)-2))+tacsat$SI_DATIM[idx[1]],tz="GMT",format = "%d/%m/%Y %H:%M")
x$SI_DATE <- format(x$SI_DATIM,format="%d/%m/%Y")
timeNotation <- ifelse(length(unlist(strsplit(tacsat$SI_TIME[1],":")))>2,"secs","mins")
if(timeNotation == "secs") x$SI_TIME <- format(x$SI_DATIM,format="%H:%M:%S")
if(timeNotation == "mins") x$SI_TIME <- format(x$SI_DATIM,format="%H:%M")
x$SI_SP <- mean(c(tacsat$SI_SP[idx[1]],tacsat$SI_SP[idx[2]]),na.rm=TRUE)
x$SI_HE <- NA;
x$SI_HE[-c(1,nrow(x))] <- bearing(x$SI_LONG[2:(nrow(x)-1)],x$SI_LATI[2:(nrow(x)-1)],x$SI_LONG[3:nrow(x)],x$SI_LATI[3:nrow(x)])
x$HL_ID <- tacsat$HL_ID[idx[1]]
return(x[-c(1,2,nrow(x)),])})
#interpolationTot <- do.call(rbind,res)
interpolationTot <- res[[1]][,which(duplicated(colnames(res[[1]]))==FALSE)]
if(length(res)>1){
for(i in 2:length(res)){
if(nrow(res[[i]])>0)
interpolationTot <- rbindTacsat(interpolationTot,res[[i]][,which(duplicated(colnames(res[[i]]))==FALSE)])
}
}
#tacsatInt <- rbind(interpolationTot,tacsat[,colnames(interpolationTot)])
tacsatInt <- rbindTacsat(tacsat,interpolationTot)
tacsatInt <- sortTacsat(tacsatInt)
return(tacsatInt)
}
`sortTacsat` <-
function(dat){
require(doBy)
if(!"SI_DATIM" %in% colnames(dat)) dat$SI_DATIM <- as.POSIXct(paste(dat$SI_DATE, dat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
#Sort the tacsat data first by ship, then by date
if("VE_REF" %in% colnames(dat)) dat <- orderBy(~VE_REF+SI_DATIM,data=dat)
if("OB_REF" %in% colnames(dat)) dat <- orderBy(~OB_REF+SI_DATIM,data=dat)
return(dat)}
`lonLatRatio` <-
function(x1,lat){
#Based on the Haversine formula
#At the position, the y-position remains the same, hence, cos(lat)*cos(lat) instead of cos(lat) * cos(y2)
a <- cos(lat*pi/180)*cos(lat*pi/180)*sin((0.1*pi/180)/2)*sin((0.1*pi/180)/2);
c <- 2*atan2(sqrt(a),sqrt(1-a));
R <- 6371;
dx1 <- R*c
return(c(dx1/11.12))}
`an` <-
function(x){return(as.numeric(x))}
`findEndTacsat` <-
function(tacsat
,startTacsat #Starting point of VMS
,interval #Specify in minutes, NULL means use all points
,margin #Specify the margin in minutes it might deviate from the interval time, in minutes
){
VMS <- tacsat
if(!"SI_DATIM" %in% colnames(VMS)) VMS$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
startVMS <- startTacsat
clStartVMS <- startVMS #Total VMS list starting point instead of subset use
iShip <- VMS$VE_REF[startVMS]
VMS. <- subset(VMS,VE_REF==iShip)
startVMS <- which(VMS$VE_REF[startVMS] == VMS.$VE_REF & VMS$SI_DATIM[startVMS] == VMS.$SI_DATIM)
if(clStartVMS != dim(VMS)[1]){
if(VMS$VE_REF[clStartVMS] != VMS$VE_REF[clStartVMS+1]){
#End of dataset reached
endDataSet <- 1
endVMS <- NA
} else {
#Calculate the difference in time between the starting VMS point and its succeeding points
diffTime <- difftime(VMS.$SI_DATIM[(startVMS+1):dim(VMS.)[1]],VMS.$SI_DATIM[startVMS],units=c("mins"))
if(length(which(diffTime >= (interval-margin) & diffTime <= (interval+margin)))==0){
warning("No succeeding point found, no interpolation possible")
endVMS <- NA
#Check if end of dataset has been reached
ifelse(all((diffTime < (interval-margin))==TRUE),endDataSet <- 1,endDataSet <- 0)
} else {
res <- which(diffTime >= (interval-margin) & diffTime <= (interval+margin))
if(length(res)>1){
res2 <- which.min(abs(interval-an(diffTime[res])))
endVMS <- startVMS + res[res2]
endDataSet <- 0
} else {
endVMS <- startVMS + res
endDataSet <- 0
}
}
#Build-in check
if(is.na(endVMS)==FALSE){
if(!an(difftime(VMS.$SI_DATIM[endVMS],VMS.$SI_DATIM[startVMS],units=c("mins"))) %in% seq((interval-margin),(interval+margin),1)) stop("found endVMS point not within interval range")
endVMS <- clStartVMS + (endVMS - startVMS)
}
}
} else { endDataSet <- 1; endVMS <- NA}
return(c(endVMS,endDataSet))}
`interpolateTacsat` <-
function(tacsat #VMS datapoints
,interval=120 #Specify in minutes, NULL means use all points
,margin=12 #Specify the margin in minutes that the interval might deviate in a search for the next point
,res=100 #Resolution of interpolation method (default = 100)
,method="cHs" #Specify the method to be used: Straight line (SL) of cubic Hermite spline (cHs)
,params=list(fm=0.5,distscale=20,sigline=0.2,st=c(2,6)) #Specify the three parameters: fm, distscale, sigline, speedthreshold
,headingAdjustment=0
,fast=FALSE){
if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
#Start interpolating the data
if(!method %in% c("cHs","SL")) stop("method selected that does not exist")
#-------------------------------------------------------------------------------
#Fast method or not
#-------------------------------------------------------------------------------
if(fast){
#Interpolation only by vessel, so split tacsat up
tacsat$ID <- 1:nrow(tacsat)
splitTa <- split(tacsat,tacsat$VE_REF)
spltTaCon <- lapply(splitTa,function(spltx){
#Calculate time different between every record
dftimex <- outer(spltx$SI_DATIM,spltx$SI_DATIM,difftime,units="mins")
iStep <- 1
connect <- list()
counter <- 1
#Loop over all possible combinations and store if a connection can be made
while(iStep <= nrow(spltx)){
endp <- which(dftimex[,iStep] >= (interval - margin) & dftimex[,iStep] <= (interval + margin))
if(length(endp)>0){
if(length(endp)>1) endp <- endp[which.min(abs(interval - dftimex[endp,iStep]))][1]
connect[[counter]] <- c(iStep,endp)
counter <- counter + 1
iStep <- endp
} else { iStep <- iStep + 1}
}
#Return matrix of conenctions
return(do.call(rbind,connect))})
if(method=="cHs") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){
return(interCubicHermiteSpline(spltx=splitTa[[y]],spltCon=spltTaCon[[y]],res,params,headingAdjustment))}),recursive=FALSE)
if(method=="SL") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){
return(interStraightLine(splitTa[[y]],spltTaCon[[y]],res))}),recursive=FALSE)
} else {
#Initiate returning result object
returnInterpolations <- list()
#Start iterating over succeeding points
for(iStep in 1:(dim(tacsat)[1]-1)){
if(iStep == 1){
iSuccess <- 0
endDataSet <- 0
startVMS <- 1
ship <- tacsat$VE_REF[startVMS]
} else {
if(is.na(endVMS)==TRUE) endVMS <- startVMS + 1
startVMS <- endVMS
#-Check if the end of the dataset is reached
if(endDataSet == 1 & rev(unique(tacsat$VE_REF))[1] != ship){
startVMS <- which(tacsat$VE_REF == unique(tacsat$VE_REF)[which(unique(tacsat$VE_REF)==ship)+1])[1]
ship <- tacsat$VE_REF[startVMS]
endDataSet<- 0
}
if(endDataSet == 1 & rev(unique(tacsat$VE_REF))[1] == ship) endDataSet <- 2 #Final end of dataset
}
#if end of dataset is not reached, try to find succeeding point
if(endDataSet != 2){
result <- findEndTacsat(tacsat,startVMS,interval,margin)
endVMS <- result[1]
endDataSet <- result[2]
if(is.na(endVMS)==TRUE) int <- 0 #No interpolation possible
if(is.na(endVMS)==FALSE) int <- 1 #Interpolation possible
#Interpolate according to the Cubic Hermite Spline method
if(method == "cHs" & int == 1){
#Define the cHs formula
F00 <- numeric()
F10 <- numeric()
F01 <- numeric()
F11 <- numeric()
i <- 0
t <- seq(0,1,length.out=res)
F00 <- 2*t^3 -3*t^2 + 1
F10 <- t^3-2*t^2+t
F01 <- -2*t^3+3*t^2
F11 <- t^3-t^2
if (is.na(tacsat[startVMS,"SI_HE"])=="TRUE") tacsat[startVMS,"SI_HE"] <- 0
if (is.na(tacsat[endVMS, "SI_HE"])=="TRUE") tacsat[endVMS, "SI_HE"] <- 0
#Heading at begin point in degrees
Hx0 <- sin(tacsat[startVMS,"SI_HE"]/(180/pi))
Hy0 <- cos(tacsat[startVMS,"SI_HE"]/(180/pi))
#Heading at end point in degrees
Hx1 <- sin(tacsat[endVMS-headingAdjustment,"SI_HE"]/(180/pi))
Hy1 <- cos(tacsat[endVMS-headingAdjustment,"SI_HE"]/(180/pi))
Mx0 <- tacsat[startVMS, "SI_LONG"]
Mx1 <- tacsat[endVMS, "SI_LONG"]
My0 <- tacsat[startVMS, "SI_LATI"]
My1 <- tacsat[endVMS, "SI_LATI"]
#Corrected for longitude lattitude effect
Hx0 <- Hx0 * params$fm * tacsat[startVMS,"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
Hx1 <- Hx1 * params$fm * tacsat[endVMS,"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
Hy0 <- Hy0 * params$fm * lonLatRatio(tacsat[c(startVMS,endVMS),"SI_LONG"],tacsat[c(startVMS,endVMS),"SI_LATI"])[1] * tacsat[startVMS,"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
Hy1 <- Hy1 * params$fm * lonLatRatio(tacsat[c(startVMS,endVMS),"SI_LONG"],tacsat[c(startVMS,endVMS),"SI_LATI"])[2] * tacsat[endVMS,"SI_SP"]/((params$st[2]-params$st[1]) /2+params$st[1])
#Finalizing the interpolation based on cHs
fx <- numeric()
fy <- numeric()
fx <- F00*Mx0+F10*Hx0+F01*Mx1+F11*Hx1
fy <- F00*My0+F10*Hy0+F01*My1+F11*Hy1
#Add one to list of successful interpolations
iSuccess <- iSuccess + 1
returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))
}
#Interpolate according to a straight line
if(method == "SL" & int == 1){
fx <- seq(tacsat$SI_LONG[startVMS],tacsat$SI_LONG[endVMS],length.out=res)
fy <- seq(tacsat$SI_LATI[startVMS],tacsat$SI_LATI[endVMS],length.out=res)
#Add one to list of successful interpolations
iSuccess <- iSuccess + 1
returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))
}
}
}
}
return(returnInterpolations)}
cat("Loading Table\n")
tacsatX <-read.table(inputFile,sep=",",header=T)
cat("Adjusting Columns Types\n")
tacsatX<-transform(tacsatX, VE_COU= as.character(VE_COU), VE_REF= as.character(VE_REF), SI_LATI= as.numeric(SI_LATI), SI_LONG= as.numeric(SI_LONG), SI_DATE= as.character(SI_DATE),SI_TIME= as.character(SI_TIME),SI_SP= as.numeric(SI_SP),SI_HE= as.numeric(SI_HE))
tacsatX$SI_DATIM=NULL
cat("Sorting dataset\n")
tacsatS <- sortTacsat(tacsatX)
tacsatCut<-tacsatS
tacsatCut <- tacsatS[1:1000,]
cat("Interpolating\n")
interpolation <- interpolateTacsat(tacsatCut,interval=interval,margin=margin,res=res, method=method,params=list(fm=fm,distscale=distscale,sigline=sigline,st=st),headingAdjustment=headingAdjustment,fast=fast)
cat("Reconstructing Dataset\n")
tacsatInt <- interpolation2Tacsat(interpolation=interpolation,tacsat=tacsatCut,npoints=npoints,equalDist=equalDist)
tacsatInt <- sortTacsat(tacsatInt)
cat("Writing output file\n")
write.csv(tacsatInt, outputFile, row.names=T)
print(Sys.time())
cat("All Done.\n")

@ -0,0 +1 @@
HSPEN_MODELER=org.gcube.dataanalysis.ecoengine.modeling.SimpleModeler

@ -0,0 +1,4 @@
HSPEN=org.gcube.dataanalysis.ecoengine.models.ModelHSPEN
AQUAMAPSNN=org.gcube.dataanalysis.ecoengine.models.ModelAquamapsNN
FEED_FORWARD_ANN=org.gcube.dataanalysis.ecoengine.models.FeedForwardNN
FEED_FORWARD_ANN_FILE=org.gcube.dataanalysis.ecoengine.models.testing.FeedForwardNNFile

@ -0,0 +1,11 @@
AQUAMAPS_SUITABLE=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsSuitableNode
AQUAMAPS_NATIVE=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsNativeNode
AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsNative2050Node
AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsSuitable2050Node
OCCURRENCES_MERGER=org.gcube.dataanalysis.executor.nodes.transducers.OccurrenceMergingNode
OCCURRENCES_INTERSECTOR=org.gcube.dataanalysis.executor.nodes.transducers.OccurrenceIntersectionNode
OCCURRENCES_SUBTRACTION=org.gcube.dataanalysis.executor.nodes.transducers.OccurrenceSubtractionNode
LWR=org.gcube.dataanalysis.executor.nodes.algorithms.LWR
BIONYM=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymFlexibleWorkflowTransducer
BIONYM_BIODIV=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymBiodiv
CMSY=org.gcube.dataanalysis.executor.nodes.algorithms.CMSY

File diff suppressed because it is too large Load Diff

@ -0,0 +1,16 @@
BIOCLIMATE_HSPEC=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHSPECTransducer
BIOCLIMATE_HCAF=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHCAFTransducer
BIOCLIMATE_HSPEN=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHSPENTransducer
HCAF_INTERPOLATION=org.gcube.dataanalysis.ecoengine.transducers.InterpolationTransducer
HCAF_FILTER=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.HcafFilter
HSPEN_FILTER=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.HspenFilter
ABSENCE_CELLS_FROM_AQUAMAPS=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.MarineAbsencePointsFromAquamapsDistribution
PRESENCE_CELLS_GENERATION=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.MarinePresencePoints
OCCURRENCES_MERGER=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsMerger
OCCURRENCES_INTERSECTOR=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsIntersector
OCCURRENCES_MARINE_TERRESTRIAL=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsInSeaOnEarth
OCCURRENCES_DUPLICATES_DELETER=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsDuplicatesDeleter
OCCURRENCES_SUBTRACTION=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsSubtraction
BIONYM=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymTransducer
BIONYM_LOCAL=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymLocalTransducer
SGVM_INTERPOLATION=org.gcube.dataanalysis.executor.rscripts.SGVMS_Interpolation

@ -0,0 +1,13 @@
ANOMALIES_DETECTION=DBSCAN,KMEANS,XMEANS
CLASSIFICATION=FEED_FORWARD_A_N_N_DISTRIBUTION
CLIMATE=BIOCLIMATE_HSPEC,BIOCLIMATE_HCAF,BIOCLIMATE_HSPEN,HCAF_INTERPOLATION
CORRELATION_ANALYSIS=HRS
DATA_CLUSTERING=DBSCAN,KMEANS,XMEANS
FILTERING=HCAF_FILTER,HSPEN_FILTER
FUNCTION_SIMULATION=FEED_FORWARD_A_N_N_DISTRIBUTION
OCCURRENCES=ABSENCE_CELLS_FROM_AQUAMAPS,PRESENCE_CELLS_GENERATION,OCCURRENCES_MERGER,OCCURRENCES_INTERSECTOR,OCCURRENCES_MARINE_TERRESTRIAL,OCCURRENCES_DUPLICATES_DELETER,OCCURRENCES_SUBTRACTION
PERFORMANCES_EVALUATION=QUALITY_ANALYSIS,DISCREPANCY_ANALYSIS
SPECIES_SIMULATION=AQUAMAPS_SUITABLE,AQUAMAPS_NATIVE,AQUAMAPS_NATIVE_2050,AQUAMAPS_SUITABLE_2050,AQUAMAPS_NATIVE_NEURALNETWORK,AQUAMAPS_SUITABLE_NEURALNETWORK
TRAINING=HSPEN,AQUAMAPSNN,FEED_FORWARD_ANN
TIME_SERIES=HCAF_INTERPOLATION
VESSELS=SGVM_INTERPOLATION

@ -0,0 +1,18 @@
<?xml version="1.0" encoding="WINDOWS-1252" standalone="no"?>
<jardesc>
<jar path="ExecutorScriptFile/org.gcube.dataanalysis.executor.executorscriptplugin.jar"/>
<options buildIfNeeded="true" compress="true" descriptionLocation="/ExecutorScriptFile/createscript.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
<selectedProjects/>
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
<sealing sealJar="false">
<packagesToSeal/>
<packagesToUnSeal/>
</sealing>
</manifest>
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
<javaElement handleIdentifier="=ExecutorScriptFile/src&lt;org.gcube.dataanalysis.executor.plugin.dummy"/>
<javaElement handleIdentifier="=ExecutorScriptFile/src&lt;org.gcube.dataanalysis.executor.scripts"/>
<javaElement handleIdentifier="=ExecutorScriptFile/src&lt;org.gcube.dataanalysis.executor.plugin"/>
</selectedElements>
</jardesc>

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<jardesc>
<jar path="C:/Users/coro/Desktop/EcologicalEngineExecutor-1.4.0-SNAPSHOT.jar"/>
<options buildIfNeeded="true" compress="true" descriptionLocation="/EcologicalEngineExecutor/deployDesktop.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
<selectedProjects/>
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
<sealing sealJar="false">
<packagesToSeal/>
<packagesToUnSeal/>
</sealing>
</manifest>
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java"/>
</selectedElements>
</jardesc>

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<jardesc>
<jar path="EcologicalEngineExecutor/PARALLEL_PROCESSING/EcologicalEngineExecutor-1.2.0-SNAPSHOT.jar"/>
<options buildIfNeeded="true" compress="true" descriptionLocation="/EcologicalEngineExecutor/deployParallelProcessing.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
<selectedProjects/>
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
<sealing sealJar="false">
<packagesToSeal/>
<packagesToUnSeal/>
</sealing>
</manifest>
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java"/>
</selectedElements>
</jardesc>

@ -0,0 +1,2 @@
v. 1.0.0 (20-04-2011)
* First release

@ -0,0 +1 @@
Used as a library in the gCube Framework

@ -0,0 +1,8 @@
gCube System - License
------------------------------------------------------------
The gCube/gCore software is licensed as Free Open Source software conveying to
the EUPL (http://ec.europa.eu/idabc/eupl).
The software and documentation is provided by its authors/distributors "as is"
and no expressed or implied warranty is given for its use, quality or fitness
for a particular case.

@ -0,0 +1,2 @@
Gianpaolo Coro (gianpaolo.coro@isti.cnr.it), CNR Pisa,
Istituto di Scienza e Tecnologie dell'Informazione "A. Faedo"

@ -0,0 +1,42 @@
The gCube System - Ecological Engine Library
------------------------------------------------------------
This work is partially funded by the European Commission in the
context of the D4Science project (www.d4science.eu), under the
1st call of FP7 IST priority.
Authors
-------
* Gianpaolo Coro (gianpaolo.coro@isti.cnr.it), CNR Pisa,
Istituto di Scienza e Tecnologie dell'Informazione "A. Faedo"
Version and Release Date
------------------------
version 1.0.0 (22-06-2012)
Description
--------------------
Support library for statistics analysis on Time Series data.
Download information
--------------------
Source code is available from SVN:
http://svn.research-infrastructures.eu/d4science/gcube/trunk/content-management/EcologicalModelling
Binaries can be downloaded from:
http://software.d4science.research-infrastructures.eu/
Documentation
-------------
VREManager documentation is available on-line from the Projects Documentation Wiki:
https://gcube.wiki.gcube-system.org/gcube/index.php/Ecological_Modeling
Licensing
---------
This software is licensed under the terms you may find in the file named "LICENSE" in this directory.

@ -0,0 +1,7 @@
<ReleaseNotes xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="xsd/changelog.xsd">
<Changeset component="org.gcube.data-analysis.ecological-engine-executor.1-0-0"
date="2012-02-23">
<Change>First Release</Change>
</Changeset>
</ReleaseNotes>

@ -0,0 +1,42 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>servicearchive</id>
<formats>
<format>tar.gz</format>
</formats>
<baseDirectory>/</baseDirectory>
<fileSets>
<fileSet>
<directory>${distroDirectory}</directory>
<outputDirectory>/</outputDirectory>
<useDefaultExcludes>true</useDefaultExcludes>
<includes>
<include>README</include>
<include>LICENSE</include>
<include>INSTALL</include>
<include>MAINTAINERS</include>
<include>changelog.xml</include>
</includes>
<fileMode>755</fileMode>
<filtered>true</filtered>
</fileSet>
</fileSets>
<files>
<file>
<source>${distroDirectory}/profile.xml</source>
<outputDirectory>/</outputDirectory>
<filtered>true</filtered>
</file>
<file>
<source>target/${build.finalName}.jar</source>
<outputDirectory>/${artifactId}</outputDirectory>
</file>
<file>
<source>${distroDirectory}/svnpath.txt</source>
<outputDirectory>/${artifactId}</outputDirectory>
<filtered>true</filtered>
</file>
</files>
</assembly>

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<Resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ID></ID>
<Type>Library</Type>
<Profile>
<Description>Ecological Engine Executor Library</Description>
<Class>EcologicalEngineExecutor</Class>
<Name>${artifactId}</Name>
<Version>1.0.0</Version>
<Packages>
<Software>
<Name>${artifactId}</Name>
<Version>${version}</Version>
<MavenCoordinates>
<groupId>${groupId}</groupId>
<artifactId>${artifactId}</artifactId>
<version>${version}</version>
</MavenCoordinates>
<Files>
<File>${build.finalName}.jar</File>
</Files>
</Software>
</Packages>
</Profile>
</Resource>

@ -0,0 +1 @@
https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine

@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<Resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ID/>
<Type>Service</Type>
<Profile>
<Description>A Plugin Executing S.O. Scripts</Description>
<Class>ExecutorPlugins</Class>
<Name>ExecutorScript</Name>
<Version>1.0.0</Version>
<Packages>
<Plugin>
<Name>plugin</Name>
<Version>1.0.0</Version>
<TargetService>
<Service>
<Class>VREManagement</Class>
<Name>Executor</Name>
<Version>1.1.0</Version>
</Service>
<Package>main</Package>
<Version>1.0.0</Version>
</TargetService>
<EntryPoint>org.gcube.dataanalysis.executor.plugin.ScriptPluginContext</EntryPoint>
<Files><File>org.gcube.dataanalysis.executor.executorscriptplugin.jar</File></Files>
</Plugin>
</Packages>
</Profile>
</Resource>

@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<Resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ID/>
<Type>Service</Type>
<Profile>
<Description>A Plugin Executing S.O. Scripts</Description>
<Class>ExecutorPlugins</Class>
<Name>ExecutorScript</Name>
<Version>1.0.0</Version>
<Packages>
<Plugin>
<Name>plugin</Name>
<Version>1.0.0</Version>
<TargetService>
<Service>
<Class>VREManagement</Class>
<Name>Executor</Name>
<Version>1.1.0</Version>
</Service>
<Package>main</Package>
<Version>1.0.0</Version>
</TargetService>
<EntryPoint>org.gcube.dataanalysis.executor.plugin.ScriptPluginContext</EntryPoint>
<Files><File>org.gcube.dataanalysis.executor.executorscriptplugin.jar</File></Files>
</Plugin>
</Packages>
</Profile>
</Resource>

@ -0,0 +1,138 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>maven-parent</artifactId>
<groupId>org.gcube.tools</groupId>
<version>1.0.0</version>
<relativePath />
</parent>
<groupId>org.gcube.dataanalysis</groupId>
<artifactId>EcologicalEngineExecutor</artifactId>
<version>1.6.4-SNAPSHOT</version>
<name>ecological-engine-executor</name>
<description>ecological-engine-executor</description>
<properties>
<distroDirectory>${project.basedir}/distro</distroDirectory>
</properties>
<dependencies>
<dependency>
<groupId>org.gcube.resourcemanagement</groupId>
<artifactId>executor-service</artifactId>
<version>[1.2.0-SNAPSHOT,2.0.0-SNAPSHOT)</version>
</dependency>
<dependency>
<groupId>org.gcube.contentmanagement</groupId>
<artifactId>storage-manager-core</artifactId>
<version>[2.0.2-SNAPSHOT,4.0.0)</version>
</dependency>
<dependency>
<groupId>org.gcube.contentmanagement</groupId>
<artifactId>storage-manager-wrapper</artifactId>
<version>[2.0.2-SNAPSHOT,4.0.0)</version>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>activemq-core</artifactId>
<version>5.6.0</version>
</dependency>
<dependency>
<groupId>org.gcube.core</groupId>
<artifactId>gcf</artifactId>
<version>[1.4.1,2.0.0)</version>
</dependency>
<dependency>
<groupId>org.gcube.dataanalysis</groupId>
<artifactId>ecological-engine</artifactId>
<version>[1.8.0-SNAPSHOT,2.0.0)</version>
</dependency>
<dependency>
<groupId>org.gcube.informationsystem</groupId>
<artifactId>is-client</artifactId>
<version>[1.5.1,1.6.0]</version>
</dependency>
<dependency>
<groupId>org.gcube.informationsystem</groupId>
<artifactId>is-collector-stubs</artifactId>
<version>[3.0.0-SNAPSHOT, 3.1.0)</version>
</dependency>
<dependency>
<groupId>org.gcube.core</groupId>
<artifactId>common-scope</artifactId>
<version>[1.2.0-SNAPSHOT,3.0.0)</version>
</dependency>
<!-- <dependency>
<groupId>org.apache.jcs</groupId>
<artifactId>jcs</artifactId>
<version>1.3</version>
</dependency>-->
<!-- <dependency> <groupId>org.gcube.dataanalysis</groupId> <artifactId>generic-worker</artifactId>
<version>1.2.0-SNAPSHOT</version> <type>jar</type> <scope>compile</scope>
<exclusions> <exclusion> <artifactId>common-scope</artifactId> <groupId>org.gcube.core</groupId>
</exclusion> </exclusions> </dependency> -->
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.12</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<version>2.5</version>
<executions>
<execution>
<id>copy-profile</id>
<phase>install</phase>
<goals>
<goal>copy-resources</goal>
</goals>
<configuration>
<outputDirectory>target</outputDirectory>
<resources>
<resource>
<directory>${distroDirectory}</directory>
<filtering>true</filtering>
<includes>
<include>profile.xml</include>
</includes>
</resource>
</resources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.2</version>
<configuration>
<descriptors>
<descriptor>${distroDirectory}/descriptor.xml</descriptor>
</descriptors>
</configuration>
<executions>
<execution>
<id>servicearchive</id>
<phase>install</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

@ -0,0 +1,148 @@
package org.gcube.dataanalysis.executor.generators;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.ecoengine.configuration.ALG_PROPS;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.interfaces.ActorNode;
import org.gcube.dataanalysis.ecoengine.interfaces.Generator;
import org.gcube.dataanalysis.ecoengine.interfaces.GenericAlgorithm;
import org.gcube.dataanalysis.executor.job.management.DistributedProcessingAgent;
public class D4ScienceDistributedProcessing implements Generator {
public static int maxMessagesAllowedPerJob = 20;
public static boolean forceUpload = true;
public static String defaultContainerFolder = "PARALLEL_PROCESSING";
protected AlgorithmConfiguration config;
protected ActorNode distributedModel;
protected String mainclass;
DistributedProcessingAgent agent;
public D4ScienceDistributedProcessing(){
}
public D4ScienceDistributedProcessing(AlgorithmConfiguration config) {
this.config = config;
AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile);
}
public void compute() throws Exception {
try {
agent.compute();
distributedModel.postProcess(agent.hasResentMessages(),false);
} catch (Exception e) {
distributedModel.postProcess(false,true);
AnalysisLogger.getLogger().error("ERROR: An Error occurred ", e);
throw e;
} finally {
shutdown();
}
}
@Override
public List<StatisticalType> getInputParameters() {
List<StatisticalType> distributionModelParams = new ArrayList<StatisticalType>();
distributionModelParams.add(new ServiceType(ServiceParameters.USERNAME,"ServiceUserName","The final user Name"));
return distributionModelParams;
}
@Override
public String getResources() {
return agent.getResources();
}
@Override
public float getStatus() {
return agent.getStatus();
}
@Override
public StatisticalType getOutput() {
return distributedModel.getOutput();
}
@Override
public ALG_PROPS[] getSupportedAlgorithms() {
ALG_PROPS[] p = { ALG_PROPS.PHENOMENON_VS_PARALLEL_PHENOMENON};
return p;
}
@Override
public INFRASTRUCTURE getInfrastructure() {
return INFRASTRUCTURE.D4SCIENCE;
}
@Override
public void init() throws Exception {
Properties p = AlgorithmConfiguration.getProperties(config.getConfigPath() + AlgorithmConfiguration.nodeAlgorithmsFile);
String model = config.getModel();
String algorithm = null;
if ((model!=null) && (model.length()>0))
algorithm = model;
else
algorithm=config.getAgent();
mainclass = p.getProperty(algorithm);
distributedModel = (ActorNode) Class.forName(mainclass).newInstance();
distributedModel.setup(config);
String scope = config.getGcubeScope();
AnalysisLogger.getLogger().info("Using the following scope for the computation:"+scope);
String owner = config.getParam("ServiceUserName");
int leftNum = distributedModel.getNumberOfLeftElements();
int rightNum = distributedModel.getNumberOfRightElements();
agent = new DistributedProcessingAgent(config, scope, owner, mainclass, config.getPersistencePath(), algorithm, defaultContainerFolder, maxMessagesAllowedPerJob, forceUpload, leftNum, rightNum);
agent.setLogger(AnalysisLogger.getLogger());
}
@Override
public void setConfiguration(AlgorithmConfiguration config) {
this.config = config;
AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile);
}
@Override
public void shutdown() {
try {
agent.shutdown();
} catch (Exception e) {
}
try {
distributedModel.stop();
} catch (Exception e) {
}
}
@Override
public String getLoad() {
return agent.getLoad();
}
@Override
public String getResourceLoad() {
return agent.getResourceLoad();
}
@Override
public GenericAlgorithm getAlgorithm() {
return distributedModel;
}
@Override
public String getDescription() {
return "A D4Science Cloud Processor for Species Distributions";
}
}

@ -0,0 +1,293 @@
package org.gcube.dataanalysis.executor.job.management;
import java.io.File;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.apache.axis.message.addressing.Address;
import org.apache.axis.message.addressing.EndpointReferenceType;
import org.apache.log4j.Logger;
import org.gcube.contentmanagement.graphtools.utils.HttpRequest;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanagement.lexicalmatcher.utils.FileTools;
import org.gcube.dataanalysis.ecoengine.configuration.ALG_PROPS;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
import org.gcube.dataanalysis.ecoengine.connectors.livemonitor.ResourceLoad;
import org.gcube.dataanalysis.ecoengine.connectors.livemonitor.Resources;
import org.gcube.dataanalysis.ecoengine.utils.Operations;
import com.thoughtworks.xstream.XStream;
public class DistributedProcessingAgent {
protected QueueJobManager jobManager;
protected boolean deletefiles = true;
protected String mainclass;
public int maxElementsAllowedPerJob = 20;
protected boolean forceUpload = true;
protected boolean stop;
protected String gscope;
protected String userName;
protected String pathToLib;
protected String modelName;
protected String containerFolder;
protected Serializable configurationFile;
protected int rightSetNumberOfElements;
protected int leftSetNumberOfElements;
protected List<String> endpoints;
protected int subdivisiondiv;
protected static String defaultJobOutput = "execution.output";
protected static String defaultScriptFile = "script";
protected Logger logger;
/**
* A distributed processing agent. Performs a distributed computation doing the MAP of the product of two sets: A and B
* Splits over B : A x B1 , A x B2, ... , A x Bn
* Prepares a script to be executed on remote nodes
* The computation is then sent to remote processors.
*/
public DistributedProcessingAgent(Serializable configurationFile,
String gCubeScope,
String computationOwner,
String mainClass,
String pathToLibFolder,
String modelName,
String containerFolder,
int maxElementsPerJob,
boolean forceReUploadofLibs,
int leftSetNumberOfElements,
int rightSetNumberOfElements
) {
this.stop = false;
this.deletefiles = true;
this.gscope=gCubeScope;
this.mainclass=mainClass;
this.maxElementsAllowedPerJob=maxElementsPerJob;
this.forceUpload=forceReUploadofLibs;
this.configurationFile=configurationFile;
this.rightSetNumberOfElements=rightSetNumberOfElements;
this.leftSetNumberOfElements=leftSetNumberOfElements;
this.userName=computationOwner;
this.pathToLib=pathToLibFolder;
this.modelName=modelName;
this.containerFolder=containerFolder;
}
public void setLogger(Logger logger){
this.logger=logger;
}
public void setEndPoints(List<String> endpoints){
this.endpoints=endpoints;
}
public boolean hasResentMessages(){
return jobManager.hasResentMessages();
}
public void compute() throws Exception {
try {
if (logger == null){
logger = AnalysisLogger.getLogger();
}
if (gscope == null)
throw new Exception("Null Scope");
AnalysisLogger.getLogger().debug("SCOPE: "+gscope);
if (endpoints != null) {
List<EndpointReferenceType> eprtList = new ArrayList<EndpointReferenceType>();
for (String ep : endpoints) {
eprtList.add(new EndpointReferenceType(new Address(ep)));
}
jobManager = new QueueJobManager(gscope, endpoints.size(), eprtList);
} else
jobManager = new QueueJobManager(gscope, 1);
int numberOfResources = jobManager.getNumberOfNodes();
// we split along right dimension so if elements are less than nodes, we should reduce the number of nodes
if (numberOfResources > 0) {
// chunkize the number of species in order to lower the computational effort of the workers
subdivisiondiv = rightSetNumberOfElements / (numberOfResources * maxElementsAllowedPerJob);
int rest = rightSetNumberOfElements % (numberOfResources * maxElementsAllowedPerJob);
if (rest > 0)
subdivisiondiv++;
if (subdivisiondiv == 0)
subdivisiondiv = 1;
executeWork(leftSetNumberOfElements, rightSetNumberOfElements, 0, subdivisiondiv, deletefiles, forceUpload);
if (jobManager.wasAborted()) {
logger.debug("Warning: Job was aborted");
// distributionModel.postProcess(false,true);
throw new Exception("Job System Error");
}
else{
//postprocess
// distributionModel.postProcess(jobManager.hasResentMessages(),false);
}
} else {
logger.debug("Warning: No Workers available");
throw new Exception("No Workers available");
}
} catch (Exception e) {
logger.error("ERROR: An Error occurred ", e);
e.printStackTrace();
throw e;
} finally {
shutdown();
}
}
private void executeWork(int leftNum, int rightNum, int offset, int numberOfResources, boolean deletefiles, boolean forceUpload) throws Exception {
String owner = userName;
int[] chunkSizes = Operations.takeChunks(rightNum, numberOfResources);
List<String> arguments = new ArrayList<String>();
// chunkize respect to the cells: take a chunk of cells vs all species at each node!
for (int i = 0; i < chunkSizes.length; i++) {
String argumentString = "0 " + leftNum + " " + offset + " " + chunkSizes[i] + " ./ "+mainclass;
arguments.add(argumentString);
offset += chunkSizes[i];
logger.debug("Generator-> Argument " + i + ": " + argumentString);
}
if (owner == null)
throw new Exception("Null Owner");
String pathToDir = new File (pathToLib, containerFolder).getAbsolutePath();
if (!(new File(pathToDir).exists()))
throw new Exception("No Implementation of node-model found for algorithm " + pathToDir);
if (mainclass == null)
throw new Exception("No mainClass found for algorithm " + pathToDir);
buildScriptFile(modelName, defaultJobOutput, pathToDir, mainclass);
jobManager.uploadAndExecuteChunkized(AlgorithmConfiguration.StatisticalManagerClass, AlgorithmConfiguration.StatisticalManagerService, owner, pathToDir, "/" + modelName + "/", "./", getScriptName(mainclass), arguments, new XStream().toXML(configurationFile), deletefiles, forceUpload);
}
private String getScriptName(String fullMainClass){
String scriptName = defaultScriptFile+"_"+fullMainClass.substring(fullMainClass.lastIndexOf(".")+1)+".sh";
return scriptName;
}
// builds a job.sh
public void buildScriptFile(String jobName, String jobOutput, String jarsPath, String fullMainClass) throws Exception {
File expectedscript = new File(jarsPath,getScriptName(fullMainClass));
if (!expectedscript.exists()) {
StringBuffer sb = new StringBuffer();
sb.append("#!/bin/sh\n");
sb.append("# " + jobName + "\n");
sb.append("cd $1\n");
sb.append("\n");
sb.append("java -Xmx1024M -classpath ./:");
File jarsPathF = new File(jarsPath);
File[] files = jarsPathF.listFiles();
for (File jar : files) {
if (jar.getName().endsWith(".jar")) {
sb.append("./" + jar.getName());
sb.append(":");
}
}
sb.deleteCharAt(sb.length() - 1);
sb.append(" " + fullMainClass + " $2 " + jobOutput);
sb.append("\n");
AnalysisLogger.getLogger().trace("D4ScienceGenerator->Generating script in " + expectedscript.getAbsolutePath());
FileTools.saveString(expectedscript.getAbsolutePath(), sb.toString(), true, "UTF-8");
}
AnalysisLogger.getLogger().trace("D4ScienceGenerator->Script " + expectedscript.getAbsolutePath()+" yet exists!");
}
public String getResources() {
Resources res = new Resources();
try {
int activeNodes = jobManager.getActiveNodes();
for (int i = 0; i < activeNodes; i++) {
try {
res.addResource("Worker_" + (i + 1), 100);
} catch (Exception e1) {
}
}
} catch (Exception e) {
AnalysisLogger.getLogger().debug("D4ScienceGenerator->active nodes not ready");
}
if ((res != null) && (res.list != null))
return HttpRequest.toJSon(res.list).replace("resId", "resID");
else
return "";
}
public float getStatus() {
try {
if (stop)
return 100f;
else
if (jobManager!=null)
return Math.max(0.5f, jobManager.getStatus() * 100f);
else
return 0;
} catch (Exception e) {
return 0f;
}
}
public ALG_PROPS[] getSupportedAlgorithms() {
ALG_PROPS[] p = { ALG_PROPS.PHENOMENON_VS_PARALLEL_PHENOMENON};
return p;
}
public INFRASTRUCTURE getInfrastructure() {
return INFRASTRUCTURE.D4SCIENCE;
}
public void shutdown() {
try {
jobManager.stop();
} catch (Exception e) {
}
stop = true;
}
public String getLoad() {
long tk = System.currentTimeMillis();
ResourceLoad rs = null;
if (jobManager!=null)
rs = new ResourceLoad(tk, jobManager.currentNumberOfStages*subdivisiondiv);
else
rs = new ResourceLoad(tk, 0);
return rs.toString();
}
private long lastTime;
private int lastProcessed;
public String getResourceLoad() {
long thisTime = System.currentTimeMillis();
int processedRecords = 0;
if ((jobManager!=null) && (subdivisiondiv>0))
processedRecords = jobManager.currentNumberOfStages*subdivisiondiv;
int estimatedProcessedRecords = 0;
if (processedRecords == lastProcessed) {
estimatedProcessedRecords = Math.round(((float) thisTime * (float) lastProcessed) / (float) lastTime);
} else {
lastProcessed = processedRecords;
estimatedProcessedRecords = lastProcessed;
}
lastTime = thisTime;
ResourceLoad rs = new ResourceLoad(thisTime, estimatedProcessedRecords);
return rs.toString();
}
}

@ -0,0 +1,821 @@
package org.gcube.dataanalysis.executor.job.management;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import java.util.UUID;
import javax.jms.ExceptionListener;
import javax.jms.JMSException;
import javax.jms.Message;
import javax.jms.MessageListener;
import org.apache.activemq.ActiveMQConnection;
import org.apache.axis.message.addressing.EndpointReferenceType;
import org.gcube.common.core.contexts.GHNContext;
import org.gcube.common.core.informationsystem.client.AtomicCondition;
import org.gcube.common.core.informationsystem.client.ISClient;
import org.gcube.common.core.informationsystem.client.RPDocument;
import org.gcube.common.core.informationsystem.client.queries.WSResourceQuery;
import org.gcube.common.core.scope.GCUBEScope;
import org.gcube.common.scope.api.ScopeProvider;
import org.gcube.contentmanagement.blobstorage.resource.StorageObject;
import org.gcube.contentmanagement.blobstorage.service.IClient;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanager.storageclient.wrapper.AccessType;
import org.gcube.contentmanager.storageclient.wrapper.MemoryType;
import org.gcube.contentmanager.storageclient.wrapper.StorageClient;
import org.gcube.dataanalysis.ecoengine.utils.Operations;
import org.gcube.dataanalysis.executor.messagequeue.ATTRIBUTE;
import org.gcube.dataanalysis.executor.messagequeue.Consumer;
import org.gcube.dataanalysis.executor.messagequeue.Producer;
import org.gcube.dataanalysis.executor.messagequeue.QCONSTANTS;
import org.gcube.dataanalysis.executor.messagequeue.QueueManager;
import org.gcube.dataanalysis.executor.scripts.ScriptIOWorker;
import org.gcube.vremanagement.executor.stubs.ExecutorCall;
import org.gcube.vremanagement.executor.stubs.TaskCall;
import org.gcube.vremanagement.executor.stubs.TaskProxy;
public class QueueJobManager {
// broadcast message period
public static int broadcastTimePeriod = 120000;
// max silence before computation stops
public static int maxSilenceTimeBeforeComputationStop = 10800000;
// max number of retries per computation step
public static int maxNumberOfComputationRetries = 1;
// period for controlling a node activity
public static int computationWatcherTimerPeriod = 120000;
// max number of message to put in a queue
// protected static int maxNumberOfMessages = 20;
public static int maxNumberOfStages = Integer.MAX_VALUE;//10;
// timeout for resending a message
public static int queueWatcherMaxwaitingTime = QCONSTANTS.refreshStatusTime;// * 5;
protected int maxFailureTries;
private static String pluginName = "generic-worker";//"GenericWorker";
protected String scope;
protected GCUBEScope gscope;
protected String session;
protected boolean yetstopped;
protected boolean messagesresent;
protected float status;
protected boolean abort;
protected boolean shutdown;
protected List<EndpointReferenceType> eprs;
protected int activeNodes;
protected int computingNodes;
protected int numberOfMessages;
protected int totalNumberOfMessages;
protected int actualNumberOfNodes;
protected int totalNumberOfStages;
public int currentNumberOfStages;
// files management
protected List<String> filenames;
protected List<String> fileurls;
// queue parameters
protected String queueName;
protected String queueResponse;
protected String queueURL;
protected String queueUSER;
protected String queuePWD;
protected org.gcube.dataanalysis.executor.messagequeue.Consumer consumer;
protected Producer producer;
Timer broadcastTimer;
Timer computationWatcherTimer;
ComputationTimerWatcher computationWatcher;
String serviceClass;
String serviceName;
String owner;
String localDir;
String remoteDir;
String outputDir;
String script;
List<String> arguments;
String configuration;
boolean deletefiles;
StatusListener statuslistener;
private void resetAllVars() {
scope = null;
gscope = null;
yetstopped = false;
messagesresent = false;
status = 0;
abort = false;
shutdown = false;
eprs = null;
activeNodes = 0;
computingNodes = 0;
numberOfMessages = 0;
actualNumberOfNodes = 0;
filenames = null;
fileurls = null;
queueName = null;
queueResponse = null;
queueURL = null;
queueUSER = null;
queuePWD = null;
consumer = null;
producer = null;
broadcastTimer = null;
computationWatcherTimer = null;
computationWatcher = null;
serviceClass = null;
serviceName = null;
owner = null;
localDir = null;
remoteDir = null;
outputDir = null;
script = null;
arguments = null;
configuration = null;
deletefiles = false;
statuslistener = null;
}
public int getActiveNodes() {
return computingNodes;
}
public float getStatus() {
float innerStatus = 0;
if (totalNumberOfMessages != 0)
innerStatus = (1f - ((float) numberOfMessages / (float) totalNumberOfMessages));
if (totalNumberOfStages == 0)
return innerStatus;
else {
float offset = ((float) Math.max(currentNumberOfStages - 1, 0)) / (float) totalNumberOfStages;
float status = offset + (innerStatus / (float) totalNumberOfStages);
// AnalysisLogger.getLogger().info("stages: "+totalNumberOfStages+" inner status: "+innerStatus+" currentStage: "+currentNumberOfStages+" status: "+status);
return status;
}
}
// there is only one node from the client point of view
public int getNumberOfNodes() {
if (eprs.size() > 0)
return 1;
else
return 0;
}
public void setNumberOfNodes(int newNumberOfNodes) {
// ignore this setting in this case
}
private void init(String scope, int numberOfNodes) throws Exception {
resetAllVars();
// init scope variables
this.scope = scope;
gscope = GCUBEScope.getScope(scope);
// introduce a session
// initialize flags
shutdown = false;
yetstopped = false;
messagesresent = false;
abort = false;
// find all the nodes - initialize the eprs
findNodes(scope);
}
public QueueJobManager(String scope, int numberOfNodes) throws Exception {
init(scope, numberOfNodes);
}
public QueueJobManager(String scope, int numberOfNodes, List<EndpointReferenceType> eprs) throws Exception {
init(scope, numberOfNodes);
this.eprs = eprs;
}
private void setGlobalVars(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, String configuration, boolean deletefiles) {
this.serviceClass = serviceClass;
this.serviceName = serviceName;
this.owner = owner;
this.localDir = localDir;
this.remoteDir = remoteDir;
this.outputDir = outputDir;
this.script = script;
this.arguments = arguments;
this.configuration = configuration;
this.deletefiles = deletefiles;
}
private int totalmessages = 0;
public boolean uploadAndExecuteChunkized(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, String configuration, boolean deletefiles, boolean forceUpload) throws Exception {
long t0 = System.currentTimeMillis();
int elements = arguments.size();
/*
* int div = elements / (maxNumberOfMessages); int rest = elements % (maxNumberOfMessages); if (rest > 0) div++; if (div == 0) { div = 1; }
*/
session = (("" + UUID.randomUUID()).replace("-", "") + Math.random()).replace(".", "");
int[] chunkSizes = null;
//up to 1120 species we don't make stages
if (elements>maxNumberOfStages)
chunkSizes = Operations.takeChunks(elements, maxNumberOfStages);
else {
chunkSizes = new int[1];
chunkSizes[0]=elements;
}
int allchunks = chunkSizes.length;
totalNumberOfStages = allchunks;
currentNumberOfStages = 0;
int start = 0;
totalmessages = 0;
AnalysisLogger.getLogger().info("Starting the computation in "+allchunks+" stages");
for (int i = 0; i < allchunks; i++) {
numberOfMessages = totalNumberOfMessages = 0;
currentNumberOfStages++;
int end = Math.min(elements, start + chunkSizes[i]);
AnalysisLogger.getLogger().info("Computing the chunk number " + (i + 1) + " of " + allchunks + " between " + start + " and " + (end - 1));
List<String> sublist = new ArrayList<String>();
for (int j = start; j < end; j++)
sublist.add(arguments.get(j));
AnalysisLogger.getLogger().info("size sub:" + sublist.size());
// totalmessages=totalmessages+sublist.size();
uploadAndExecute(serviceClass, serviceName, owner, localDir, remoteDir, outputDir, script, sublist, configuration, deletefiles, forceUpload);
if (abort)
break;
start = end;
AnalysisLogger.getLogger().info("Processed chunk number " + (i + 1));
}
currentNumberOfStages = totalNumberOfStages;
AnalysisLogger.getLogger().info("Finished computation on all chunks and messages " + totalmessages);
AnalysisLogger.getLogger().info("Whole Procedure done in " + (System.currentTimeMillis() - t0) + " ms");
return (!abort);
}
private boolean uploadAndExecute(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, String configuration, boolean deletefiles, boolean forceUpload) throws Exception {
int numberOfRetries = maxNumberOfComputationRetries;
boolean recompute = true;
while ((numberOfRetries > 0) && (recompute)) {
long t0 = System.currentTimeMillis();
// if (numberOfRetries<maxNumberOfComputationRetries)
init(scope, 1);
AnalysisLogger.getLogger().info("Computation Try number " + (maxNumberOfComputationRetries + 1 - numberOfRetries));
AnalysisLogger.getLogger().info("Contacting " + actualNumberOfNodes + " Nodes");
// set globals
setGlobalVars(serviceClass, serviceName, owner, localDir, remoteDir, outputDir, script, arguments, configuration, deletefiles);
// if not yet uploaded , upload required files
uploadFilesOnStorage(forceUpload);
// initializing queue
setQueueVariables();
// broadcast a message to all executors for purging previous queues
// purgeQueues();
createClientProducer();
broadcastListenCommandToExecutorNodes();
maxFailureTries = activeNodes * 1;
broadcastTimer = new Timer();
broadcastTimer.schedule(new Broadcaster(), broadcastTimePeriod, broadcastTimePeriod);
computationWatcherTimer = new Timer();
computationWatcher = new ComputationTimerWatcher(maxSilenceTimeBeforeComputationStop);
computationWatcherTimer.schedule(computationWatcher, computationWatcherTimerPeriod, computationWatcherTimerPeriod);
// send all messages
sendMessages();
createClientConsumer();
// wait for messages
waitForMessages();
AnalysisLogger.getLogger().info("Wait for message finished - checking result");
if (numberOfMessages == 0) {
AnalysisLogger.getLogger().info("All tasks have correctly finished!");
}
/*
* else{ AnalysisLogger.getLogger().info("Timeout - Warning Some Task is missing!"); for (int k=0;k<finishedChunks.length;k++){ if (finishedChunks[k]==0){ AnalysisLogger.getLogger().info("Sending Again message number " + k); Map<String, Object> inputs = generateInputMessage(filenames, fileurls, outputDir, script, arguments.get(k), k, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles); producer.sendMessage(inputs, 0); AnalysisLogger.getLogger().info("Sent Message " + k); } } waitForMessages(); if (numberOfMessages>0){ abort = true; } }
*/
// deleteRemoteFolder();
// summary
AnalysisLogger.getLogger().info("-SUMMARY-");
for (int i = 0; i < totalNumberOfMessages; i++) {
if (activeMessages[i])
AnalysisLogger.getLogger().info("Error : the Message Number " + i + " Was Never Processed!");
if (resentMessages[i] > 0) {
messagesresent = true;
AnalysisLogger.getLogger().info("Warning : the Message Number " + i + " Was resent " + resentMessages[i] + " Times");
}
}
AnalysisLogger.getLogger().info("-SUMMARY END-");
stop();
AnalysisLogger.getLogger().info("Stopped");
AnalysisLogger.getLogger().info("Single Step Procedure done in " + (System.currentTimeMillis() - t0) + " ms");
activeNodes = 0;
numberOfRetries--;
if (abort) {
recompute = true;
if (numberOfRetries > 0)
Thread.sleep(10000);
} else
recompute = false;
}
return (!abort);
}
public boolean hasResentMessages() {
return messagesresent;
}
public void waitForMessages() throws Exception {
AnalysisLogger.getLogger().info("Waiting...");
while ((numberOfMessages > 0) && (!abort)) {
Thread.sleep(2000);
// long tcurrent = System.currentTimeMillis();
// if ((tcurrent - waitTime) > maxwaitingTime) {
// break;
// }
}
AnalysisLogger.getLogger().info("...Stop - Abort?" + abort);
}
public boolean wasAborted() {
return abort;
}
public void purgeQueues() throws Exception {
AnalysisLogger.getLogger().info("Purging Queue");
List<WorkerWatcher> tasksProxies = new ArrayList<WorkerWatcher>();
for (int j = 0; j < actualNumberOfNodes; j++) {
try {
contactNodes(tasksProxies, j, queueName, queueUSER, queuePWD, queueURL, queueResponse, session, "true");
} catch (Exception e) {
e.printStackTrace();
AnalysisLogger.getLogger().info("Error in purgin queue on node " + j);
}
}
AnalysisLogger.getLogger().info("Queue Purged");
}
public void stop() {
try {
if (!yetstopped) {
if (broadcastTimer != null) {
AnalysisLogger.getLogger().info("Stopping Broadcaster");
broadcastTimer.cancel();
broadcastTimer.purge();
}
if (computationWatcherTimer != null) {
AnalysisLogger.getLogger().info("Stopping Watcher");
computationWatcherTimer.cancel();
computationWatcherTimer.purge();
}
AnalysisLogger.getLogger().info("Purging Status Listener");
if (statuslistener != null)
statuslistener.destroyAllWatchers();
AnalysisLogger.getLogger().info("Stopping Producer and Consumer");
try{
producer.stop();
producer.closeSession();
}catch(Exception e1){}
try{
consumer.stop();
consumer.closeSession();
}catch(Exception e2){}
AnalysisLogger.getLogger().info("Purging Remote Queues");
purgeQueues();
yetstopped = true;
}
} catch (Exception e) {
e.printStackTrace();
AnalysisLogger.getLogger().info("Not completely stopped");
}
}
private void contactNodes(List<WorkerWatcher> tasksProxies, int order, String queueName, String queueUSER, String queuePWD, String queueURL, String queueResponse, String session, String purgeQueue) throws Exception {
// generate the input map according to the arguments
Map<String, Object> inputs = generateWorkerInput(queueName, queueUSER, queuePWD, queueURL, queueResponse, session, purgeQueue);
AnalysisLogger.getLogger().info("Inputs " + inputs);
// take the i-th endpoint of the executor
EndpointReferenceType selectedEPR = eprs.get(order);
AnalysisLogger.getLogger().info("Broadcasting to node " + (order + 1) + " on " + selectedEPR.getAddress());
// run the executor script
ExecutorCall call = new ExecutorCall(pluginName, gscope);
call.setEndpointReference(selectedEPR);
TaskCall task = null;
AnalysisLogger.getLogger().info("EPR:" + selectedEPR);
task = call.launch(inputs);
// AnalysisLogger.getLogger().info("Task EPR:" + task.getEndpointReference());
TaskProxy proxy = task.getProxy();
tasksProxies.add(new WorkerWatcher(proxy, AnalysisLogger.getLogger()));
// AnalysisLogger.getLogger().info("Contacting node " + (order + 1) + " OK on " + selectedEPR);
}
private int findNodes(String scopeString) throws Exception {
AnalysisLogger.getLogger().debug("SCOPE:"+scopeString);
GCUBEScope scope = GCUBEScope.getScope(scopeString);
ISClient client = GHNContext.getImplementation(ISClient.class);
WSResourceQuery wsquery = client.getQuery(WSResourceQuery.class);
wsquery.addAtomicConditions(new AtomicCondition("//gc:ServiceName", "Executor"));
wsquery.addAtomicConditions(new AtomicCondition("/child::*[local-name()='Task']/name[text()='" + pluginName + "']", pluginName));
List<RPDocument> listdoc = client.execute(wsquery, scope);
EndpointReferenceType epr = null;
eprs = new ArrayList<EndpointReferenceType>();
int numberOfEP = 0;
for (RPDocument resource : listdoc) {
epr = resource.getEndpoint();
numberOfEP++;
eprs.add(epr);
}
AnalysisLogger.getLogger().info("Found " + numberOfEP + " endpoints");
// get current number of available nodes
actualNumberOfNodes = eprs.size();
return numberOfEP;
}
private void setQueueVariables() throws Exception {
queueName = "D4ScienceJob"; // + session;
queueResponse = queueName + "Response"+session;
//general scope
queueURL = gscope.getServiceMap().getEndpoints(GHNContext.MSGBROKER).iterator().next().getAddress().toString();
//tests on ecosystem
//TODO: delete this!
// queueURL = "tcp://ui.grid.research-infrastructures.eu:6166";
// queueURL = "tcp://message-broker.d4science.research-infrastructures.eu:6166";
AnalysisLogger.getLogger().info("Queue for the scope: " + queueURL);
if (queueURL==null){
if (scope.startsWith("/gcube"))
queueURL = "tcp://ui.grid.research-infrastructures.eu:6166";
else
queueURL = "tcp://message-broker.d4science.research-infrastructures.eu:6166";
}
queueUSER = ActiveMQConnection.DEFAULT_USER;
queuePWD = ActiveMQConnection.DEFAULT_PASSWORD;
}
public void deleteRemoteFolder() throws Exception {
ScopeProvider.instance.set(scope);
IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED,MemoryType.VOLATILE).getClient();
// IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, gscope).getClient();
AnalysisLogger.getLogger().info("Removing Remote Dir " + remoteDir);
client.removeDir().RDir(remoteDir);
AnalysisLogger.getLogger().info("Removed");
}
private void uploadFilesOnStorage(boolean forceupload) throws Exception {
ScopeProvider.instance.set(scope);
IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, MemoryType.VOLATILE).getClient();
// IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, gscope).getClient();
File dir = new File(localDir);
File[] files = dir.listFiles();
AnalysisLogger.getLogger().info("Start uploading");
filenames = new ArrayList<String>();
fileurls = new ArrayList<String>();
boolean uploadFiles = forceupload;
// if we do not force upload then check if the folder is yet there
if (!uploadFiles) {
List<StorageObject> remoteObjects = client.showDir().RDir(remoteDir);
// only upload files if they are not yet uploaded
if (remoteObjects.size() == 0)
uploadFiles = true;
}
if (!uploadFiles)
AnalysisLogger.getLogger().info("Unnecessary to Uploading Files");
AnalysisLogger.getLogger().info("Loading files");
for (File sfile : files) {
if (sfile.getName().startsWith("."))
continue;
String localf = sfile.getAbsolutePath();
String filename = sfile.getName();
String remotef = remoteDir + sfile.getName();
if (uploadFiles) {
client.put(true).LFile(localf).RFile(remotef);
AnalysisLogger.getLogger().info("Uploading File "+localf+" as remote file "+remotef);
}
String url = client.getUrl().RFile(remotef);
// AnalysisLogger.getLogger().info("URL obtained: " + url);
filenames.add(filename);
fileurls.add(url);
}
AnalysisLogger.getLogger().info("Loading finished");
}
private void broadcastListenCommandToExecutorNodes() throws Exception {
AnalysisLogger.getLogger().info("Submitting script to Remote Queue " + queueName);
List<WorkerWatcher> tasksProxies = new ArrayList<WorkerWatcher>();
try{
findNodes(scope);
}catch(Exception e){
AnalysisLogger.getLogger().info("Error in Finding nodes - using previous value");
}
activeNodes = actualNumberOfNodes;
// launch the tasks
for (int i = 0; i < actualNumberOfNodes; i++) {
try {
contactNodes(tasksProxies, i, queueName, queueUSER, queuePWD, queueURL, queueResponse, session, "false");
} catch (Exception e) {
e.printStackTrace();
AnalysisLogger.getLogger().info("Error in Contacting nodes");
}
}
}
private void createClientProducer() throws Exception {
AnalysisLogger.getLogger().info("Creating Message Queue and Producer");
// create the Producer
QueueManager qm = new QueueManager();
qm.createAndConnect(queueUSER, queuePWD, queueURL, queueName);
producer = new Producer(qm, queueName);
AnalysisLogger.getLogger().info("Producer OK");
}
private void createClientConsumer() throws Exception {
AnalysisLogger.getLogger().info("Creating Response Message Queue and Consumer");
// create the listener
statuslistener = new StatusListener();
QueueManager qm1 = new QueueManager();
qm1.createAndConnect(queueUSER, queuePWD, queueURL, queueResponse);
consumer = new Consumer(qm1, statuslistener, statuslistener, queueResponse);
AnalysisLogger.getLogger().info("Consumers OK");
}
boolean activeMessages[];
public int resentMessages[];
private void sendMessages() throws Exception {
int i = 0;
numberOfMessages = arguments.size();
totalNumberOfMessages = numberOfMessages;
AnalysisLogger.getLogger().info("Messages To Send " + numberOfMessages);
activeMessages = new boolean[numberOfMessages];
resentMessages = new int[numberOfMessages];
for (String argum : arguments) {
Map<String, Object> inputs = generateInputMessage(filenames, fileurls, outputDir, script, argum, i, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles, false);
producer.sendMessage(inputs, 0);
AnalysisLogger.getLogger().info("Send " + i);
activeMessages[i] = true;
i++;
}
AnalysisLogger.getLogger().info("Messages Sent " + numberOfMessages);
}
private Map<String, Object> generateInputMessage(Object filenames, Object fileurls, String outputDir, String script, String argum, int i, String scope, String serviceClass, String serviceName, String owner, String remoteDir, String session, String configuration, boolean deletefiles, boolean duplicateMessage) {
Map<String, Object> inputs = new HashMap<String, Object>();
inputs.put(ATTRIBUTE.FILE_NAMES.name(), filenames);
inputs.put(ATTRIBUTE.FILE_URLS.name(), fileurls);
inputs.put(ATTRIBUTE.OUTPUTDIR.name(), outputDir);
inputs.put(ATTRIBUTE.SCRIPT.name(), script);
inputs.put(ATTRIBUTE.ARGUMENTS.name(), argum + " " + duplicateMessage);
inputs.put(ATTRIBUTE.ORDER.name(), "" + i);
inputs.put(ATTRIBUTE.SCOPE.name(), scope);
inputs.put(ATTRIBUTE.SERVICE_CLASS.name(), serviceClass);
inputs.put(ATTRIBUTE.SERVICE_NAME.name(), serviceName);
inputs.put(ATTRIBUTE.OWNER.name(), owner);
inputs.put(ATTRIBUTE.REMOTEDIR.name(), remoteDir);
inputs.put(ATTRIBUTE.CLEAN_CACHE.name(), "" + deletefiles);
inputs.put(ATTRIBUTE.QSESSION.name(), session);
inputs.put(ATTRIBUTE.CONFIGURATION.name(), configuration);
inputs.put(ATTRIBUTE.TOPIC_RESPONSE_NAME.name(), queueResponse);
inputs.put(ATTRIBUTE.QUEUE_USER.name(), queueUSER);
inputs.put(ATTRIBUTE.QUEUE_PASSWORD.name(), queuePWD);
inputs.put(ATTRIBUTE.QUEUE_URL.name(), queueURL);
return inputs;
}
private Map<String, Object> generateWorkerInput(String queueName, String queueUser, String queuePassword, String queueURL, String queueResponse, String session, String purge) {
Map<String, Object> inputs = new HashMap<String, Object>();
inputs.put(ATTRIBUTE.TOPIC_NAME.name(), ScriptIOWorker.toInputString(queueName));
inputs.put(ATTRIBUTE.QUEUE_USER.name(), ScriptIOWorker.toInputString(queueUser));
inputs.put(ATTRIBUTE.QUEUE_PASSWORD.name(), ScriptIOWorker.toInputString(queuePassword));
inputs.put(ATTRIBUTE.QUEUE_URL.name(), ScriptIOWorker.toInputString(queueURL));
inputs.put(ATTRIBUTE.TOPIC_RESPONSE_NAME.name(), ScriptIOWorker.toInputString(queueResponse));
inputs.put(ATTRIBUTE.QSESSION.name(), session);
inputs.put(ATTRIBUTE.ERASE.name(), purge);
return inputs;
}
public class Broadcaster extends TimerTask {
@Override
public void run() {
try {
AnalysisLogger.getLogger().info("(((((((((((((((((((((((((((------Broadcasting Information To Watchers------)))))))))))))))))))))))))))");
broadcastListenCommandToExecutorNodes();
AnalysisLogger.getLogger().info("(((((((((((((((((((((((((((------END Broadcasting Information To Watchers------)))))))))))))))))))))))))))");
} catch (Exception e) {
e.printStackTrace();
AnalysisLogger.getLogger().info("--------------------------------Broadcaster: Error Sending Listen Message to Executors------)))))))))))))))))))))))))))");
}
}
}
public class ComputationTimerWatcher extends TimerTask {
long maxTime;
long lastTimeClock;
public ComputationTimerWatcher(long maxtime) {
this.maxTime = maxtime;
this.lastTimeClock = System.currentTimeMillis();
}
public void reset() {
lastTimeClock = System.currentTimeMillis();
}
public void setmaxTime(long maxTime) {
this.maxTime = maxTime;
}
@Override
public void run() {
try {
long t0 = System.currentTimeMillis();
AnalysisLogger.getLogger().info("Computation Watcher Timing Is " + (t0 - lastTimeClock)+" max computation time is "+maxTime);
if ((t0 - lastTimeClock) > maxTime) {
AnalysisLogger.getLogger().info("Computation Watcher - Computation Timeout: Closing Queue Job Manager!!!");
abort();
}
} catch (Exception e) {
e.printStackTrace();
AnalysisLogger.getLogger().info("Error Taking clock");
}
}
}
public synchronized void abort() {
AnalysisLogger.getLogger().info("Computation Aborted");
this.abort = true;
}
public class StatusListener implements MessageListener, ExceptionListener {
private QueueWorkerWatcher[] watchers;
synchronized public void onException(JMSException ex) {
abort();
AnalysisLogger.getLogger().info("JMS Exception occured. Shutting down client.");
}
private synchronized void addWatcher(int order) {
if (watchers == null)
watchers = new QueueWorkerWatcher[totalNumberOfMessages];
QueueWorkerWatcher watcher = watchers[order];
if (watcher != null) {
destroyWatcher(order);
}
Map<String, Object> message = generateInputMessage(filenames, fileurls, outputDir, script, arguments.get(order), order, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles, true);
watchers[order] = new QueueWorkerWatcher(producer, message, order);
}
private synchronized void resetWatcher(int order) {
if (watchers == null)
watchers = new QueueWorkerWatcher[totalNumberOfMessages];
else if (watchers[order] != null)
watchers[order].resetTime();
}
private synchronized void destroyWatcher(int order) {
if (watchers != null && watchers[order] != null) {
if (watchers[order].hasResent())
resentMessages[order] = resentMessages[order] + 1;
watchers[order].destroy();
watchers[order] = null;
AnalysisLogger.getLogger().info("Destroyed Watcher number " + order);
}
}
public synchronized void destroyAllWatchers() {
if (watchers != null) {
for (int i = 0; i < watchers.length; i++) {
destroyWatcher(i);
}
}
}
public void onMessage(Message message) {
// get message
try {
HashMap<String, Object> details = (HashMap<String, Object>) (HashMap<String, Object>) message.getObjectProperty(ATTRIBUTE.CONTENT.name());
String status = (String) details.get(ATTRIBUTE.STATUS.name());
String order = "" + details.get(ATTRIBUTE.ORDER.name());
String nodeaddress = (String) details.get(ATTRIBUTE.NODE.name());
String msession = (String) details.get(ATTRIBUTE.QSESSION.name());
Object error = details.get(ATTRIBUTE.ERROR.name());
AnalysisLogger.getLogger().info("Current session " + session);
if ((msession != null) && (msession.equals(session))) {
AnalysisLogger.getLogger().info("Session " + session + " is right - acknowledge");
message.acknowledge();
AnalysisLogger.getLogger().info("Session " + session + " acknowledged");
int orderInt = -1;
try {
orderInt = Integer.parseInt(order);
} catch (Exception e3) {
e3.printStackTrace();
}
if (orderInt > -1) {
// reset the watcher
if (computationWatcher!=null)
computationWatcher.reset();
AnalysisLogger.getLogger().info("Task number " + order + " is " + status + " on node " + nodeaddress + " and session " + session);
if (status.equals(ATTRIBUTE.STARTED.name())) {
computingNodes++;
addWatcher(orderInt);
}
if (status.equals(ATTRIBUTE.PROCESSING.name())) {
resetWatcher(orderInt);
} else if (status.equals(ATTRIBUTE.FINISHED.name())) {
totalmessages++;
computingNodes--;
destroyWatcher(orderInt);
if (numberOfMessages > 0)
numberOfMessages--;
AnalysisLogger.getLogger().info("Remaining " + numberOfMessages + " messages to manage");
activeMessages[orderInt] = false;
} else if (status.equals(ATTRIBUTE.FATAL_ERROR.name())) {
if (error!=null)
AnalysisLogger.getLogger().info("REPORTED FATAL_ERROR on " +nodeaddress+" : ");
AnalysisLogger.getLogger().info(error);
computingNodes--;
if (maxFailureTries <= 0) {
AnalysisLogger.getLogger().info("Too much Failures - Aborting");
destroyAllWatchers();
abort();
} else {
AnalysisLogger.getLogger().info("Failure Occurred - Now Resending Message " + orderInt);
resentMessages[orderInt] = resentMessages[orderInt] + 1;
maxFailureTries--;
// resend message
Map<String, Object> retrymessage = generateInputMessage(filenames, fileurls, outputDir, script, arguments.get(orderInt), orderInt, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles, true);
producer.sendMessage(retrymessage, QCONSTANTS.timeToLive);
AnalysisLogger.getLogger().info("Failure Occurred - Resent Message " + orderInt);
}
}
} else
AnalysisLogger.getLogger().info("Ignoring message " + order + " with status " + status);
} else {
AnalysisLogger.getLogger().info("wrong session " + msession + " ignoring message");
// consumer.manager.session.recover();
}
} catch (Exception e) {
AnalysisLogger.getLogger().info("Error reading details ", e);
AnalysisLogger.getLogger().info("...Aborting Job...");
abort();
}
}
}
}

@ -0,0 +1,76 @@
package org.gcube.dataanalysis.executor.job.management;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import javax.jms.Message;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.executor.messagequeue.ATTRIBUTE;
import org.gcube.dataanalysis.executor.messagequeue.Producer;
import org.gcube.dataanalysis.executor.messagequeue.QCONSTANTS;
public class QueueWorkerWatcher {
protected int maxwaitingTime = 2*QueueJobManager.queueWatcherMaxwaitingTime;
private long lastTimeClock;
Timer watcher;
Producer producer;
Map<String, Object> message;
public boolean resent=false;
int order;
public QueueWorkerWatcher(Producer producer, Map<String, Object> message, int order) {
this.producer = producer;
this.message = message;
resent=false;
this.order = order;
watcher = new Timer();
watcher.schedule(new Controller(), 0, QCONSTANTS.refreshStatusTime);
resetTime();
}
public synchronized void resetTime() {
lastTimeClock = System.currentTimeMillis();
}
public synchronized void destroy() {
if (watcher != null) {
watcher.cancel();
watcher.purge();
watcher = null;
}
}
public boolean hasResent(){
return resent;
}
private class Controller extends TimerTask {
@Override
public void run() {
try {
long t0 = System.currentTimeMillis();
AnalysisLogger.getLogger().debug("Watcher "+order+" Timing Is "+(t0 - lastTimeClock)+ " max waiting time: "+maxwaitingTime);
if ((t0 - lastTimeClock) > maxwaitingTime) {
AnalysisLogger.getLogger().info("Watcher "+order+" Time Is Over "+(t0 - lastTimeClock));
AnalysisLogger.getLogger().info("Watcher "+order+" Re-Sending Message "+message);
producer.sendMessage(message, QCONSTANTS.timeToLive);
// QueueJobManager.resentMessages[Integer.parseInt(""+message.get(ATTRIBUTE.ORDER.name()))]=QueueJobManager.resentMessages[Integer.parseInt(""+message.get(ATTRIBUTE.ORDER.name()))]+1;
resent = true;
AnalysisLogger.getLogger().info("Watcher "+order+" Destroying watcher");
destroy();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}

@ -0,0 +1,248 @@
package org.gcube.dataanalysis.executor.job.management;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.apache.axis.message.addressing.EndpointReferenceType;
import org.gcube.common.core.contexts.GHNContext;
import org.gcube.common.core.informationsystem.client.AtomicCondition;
import org.gcube.common.core.informationsystem.client.ISClient;
import org.gcube.common.core.informationsystem.client.RPDocument;
import org.gcube.common.core.informationsystem.client.queries.WSResourceQuery;
import org.gcube.common.core.scope.GCUBEScope;
import org.gcube.common.scope.api.ScopeProvider;
import org.gcube.contentmanagement.blobstorage.service.IClient;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanager.storageclient.wrapper.AccessType;
import org.gcube.contentmanager.storageclient.wrapper.StorageClient;
import org.gcube.dataanalysis.executor.scripts.ScriptIOWorker;
import org.gcube.vremanagement.executor.stubs.ExecutorCall;
import org.gcube.vremanagement.executor.stubs.TaskCall;
import org.gcube.vremanagement.executor.stubs.TaskProxy;
public class RemoteJobManager {
private static String pluginName = "ExecutorScript";
private int actualNumberOfNodes;
private GCUBEScope gscope;
private List<EndpointReferenceType> eprs;
float status;
boolean abort;
boolean shutdown;
protected int activeNodes;
String scope;
public int getActiveNodes() {
return activeNodes;
}
public float getStatus() {
return status;
}
public int getNumberOfNodes() {
return actualNumberOfNodes;
}
public void setNumberOfNodes(int newNumberOfNodes) {
actualNumberOfNodes = newNumberOfNodes;
}
public void init(String scope, int numberOfNodes) throws Exception {
this.scope = scope;
gscope = GCUBEScope.getScope(scope);
AnalysisLogger.getLogger().debug("Using the following scope for this computation:"+gscope);
shutdown = false;
yetuploaded = false;
if (eprs == null)
actualNumberOfNodes = findNodes(scope);
else
actualNumberOfNodes = eprs.size();
if (numberOfNodes < actualNumberOfNodes)
actualNumberOfNodes = numberOfNodes;
}
public RemoteJobManager(String scope, int numberOfNodes) throws Exception {
init(scope, numberOfNodes);
}
public RemoteJobManager(String scope, int numberOfNodes, List<EndpointReferenceType> eprs) throws Exception {
this.eprs = eprs;
init(scope, numberOfNodes);
}
List<String> filenames;
List<String> fileurls;
boolean yetuploaded;
String session;
public boolean uploadAndExecute(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, boolean deletefiles) throws Exception {
boolean executeAll = false;
long t0 = System.currentTimeMillis();
//if not yet uploaded , upload required files
if (!yetuploaded) {
ScopeProvider.instance.set(scope);
IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED).getClient();
// IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, gscope).getClient();
File dir = new File(localDir);
File[] files = dir.listFiles();
AnalysisLogger.getLogger().debug("Start uploading");
filenames = new ArrayList<String>();
fileurls = new ArrayList<String>();
for (File sfile : files) {
String localf = sfile.getAbsolutePath();
String filename = sfile.getName();
String remotef = remoteDir + sfile.getName();
client.put(true).LFile(localf).RFile(remotef);
String url = client.getUrl().RFile(remotef);
AnalysisLogger.getLogger().debug("URL created: " + url);
filenames.add(filename);
fileurls.add(url);
}
AnalysisLogger.getLogger().debug("Upload end");
yetuploaded = true;
session = (""+UUID.randomUUID()).replace("-", "");
}
//if the number of available nodes is higher than zero launch the tasks
if (actualNumberOfNodes > 0) {
AnalysisLogger.getLogger().debug("Executing script on " + actualNumberOfNodes + " nodes");
int len = arguments.size();
List<WorkerWatcher> tasksProxies = new ArrayList<WorkerWatcher>();
activeNodes = 0;
//launch the tasks
for (int i = 0; i < actualNumberOfNodes; i++) {
String argum = "";
//supply the arguments if they are available
if (i < len)
argum = arguments.get(i);
//generate the input map according to the arguments
Map<String, Object> inputs = generateInput(filenames, fileurls, outputDir, script, argum, i, scope, serviceClass, serviceName, owner, remoteDir,session,deletefiles);
AnalysisLogger.getLogger().debug("-> Owner: " + owner + " ServiceClass: " + serviceClass + " ServiceName:" + serviceName + " remoteDir:" + remoteDir);
//take the i-th endpoint of the executor
EndpointReferenceType selectedEPR = eprs.get(i);
AnalysisLogger.getLogger().debug("Launching node " + (i + 1) + " on " + selectedEPR);
//run the executor script
ExecutorCall call = new ExecutorCall(pluginName, gscope);
call.setEndpointReference(selectedEPR);
TaskCall task = null;
task = call.launch(inputs);
TaskProxy proxy = task.getProxy();
tasksProxies.add(new WorkerWatcher(proxy, AnalysisLogger.getLogger()));
AnalysisLogger.getLogger().debug("Launching node " + (i + 1) + " OK on " + selectedEPR);
//add the task to the list in order to reuse it
}
activeNodes = actualNumberOfNodes;
AnalysisLogger.getLogger().debug("Launch Finished - Controlling Status");
int allstatus = 0;
abort = false;
//control the execution: go until there are active nodes or the process must stop
while ((activeNodes != 0) && (!abort) && (!shutdown)) {
//for each node get the task state
int nworkers = tasksProxies.size();
int i=0;
while (i < nworkers) {
WorkerWatcher proxy = tasksProxies.get(i);
String state = proxy.getState();
AnalysisLogger.getLogger().debug("REMOTE JOB MANAGER-> STATE " + state );
//control for aborted computation
abort = ((state == null) || state.equals("FAILED") || (!state.equals("DONE") && !state.equals("RUNNING")));
//control for finished computation
boolean finished = false;
if (state != null)
finished = state.equals("DONE");
//if finished update the active nodes
if (finished) {
tasksProxies.remove(i);
allstatus++;
activeNodes--;
nworkers--;
if (activeNodes == 0)
break;
}
else
i++;
status = Math.min(((float) allstatus / (float) actualNumberOfNodes) * 100f, 95f);
if (abort)
break;
if (shutdown)
break;
// AnalysisLogger.getLogger().debug(String.format("Task " + i + "executed started at %Tc with %s state ", proxy.getStartTime(), state));
//sleep before polling again
Thread.sleep(2000);
}
}
activeNodes = 0;
AnalysisLogger.getLogger().debug("All Tasks have Finished");
if (!abort) {
AnalysisLogger.getLogger().debug("All Task were successful");
/*
* List<StorageObject> listElements = client.showDir().RDir(remoteDir); for (StorageObject obj : listElements) { AnalysisLogger.getLogger().debug("obj stored in directory " + remoteDir + ": " + obj.getName()); }
*/
} else
AnalysisLogger.getLogger().debug("Tasks were NOT successful");
} else
AnalysisLogger.getLogger().debug("Warning: could not execute tasks: No Nodes Available!");
AnalysisLogger.getLogger().debug("Whole procedure done in " + (System.currentTimeMillis() - t0) + " ms");
status = 100f;
return executeAll;
}
public boolean wasAborted() {
return abort;
}
public void stop() {
shutdown = true;
}
private int findNodes(String scopeString) throws Exception {
GCUBEScope scope = GCUBEScope.getScope(scopeString);
ISClient client = GHNContext.getImplementation(ISClient.class);
WSResourceQuery wsquery = client.getQuery(WSResourceQuery.class);
wsquery.addAtomicConditions(new AtomicCondition("//gc:ServiceName", "Executor"));
wsquery.addAtomicConditions(new AtomicCondition("/child::*[local-name()='Task']/name[text()='"+pluginName+"']", pluginName));
List<RPDocument> listdoc = client.execute(wsquery, scope);
EndpointReferenceType epr = null;
eprs = new ArrayList<EndpointReferenceType>();
int numberOfEP = 0;
for (RPDocument resource : listdoc) {
epr = resource.getEndpoint();
numberOfEP++;
eprs.add(epr);
}
AnalysisLogger.getLogger().debug("Found " + numberOfEP + " endpoints");
return numberOfEP;
}
private Map<String, Object> generateInput(Object filenames, Object fileurls, String outputDir, String script, String argum, int i, String scope, String serviceClass, String serviceName, String owner, String remoteDir,String session,boolean deletefiles) {
Map<String, Object> inputs = new HashMap<String, Object>();
inputs.put("FILE_NAMES", filenames);
inputs.put("FILE_URLS", fileurls);
inputs.put("OUTPUTDIR", ScriptIOWorker.toInputString(outputDir));
inputs.put("SCRIPT", ScriptIOWorker.toInputString(script));
inputs.put("ARGUMENTS", ScriptIOWorker.toInputString(argum));
inputs.put("NODE_IDENTIFIER", "" + i);
inputs.put("SCOPE", ScriptIOWorker.toInputString(scope));
inputs.put("SERVICE_CLASS", ScriptIOWorker.toInputString(serviceClass));
inputs.put("SERVICE_NAME", ScriptIOWorker.toInputString(serviceName));
inputs.put("OWNER", ScriptIOWorker.toInputString(owner));
inputs.put("REMOTEDIR", ScriptIOWorker.toInputString(remoteDir));
inputs.put("CLEAN_CACHE",""+deletefiles);
// inputs.put("SESSION", ScriptIO.toInputString(session));
return inputs;
}
}

@ -0,0 +1,37 @@
package org.gcube.dataanalysis.executor.job.management;
import org.apache.log4j.Logger;
import org.gcube.vremanagement.executor.stubs.TaskProxy;
public class WorkerWatcher {
private static int maxTries = 15;
private int currentTries;
private static String runningState = "RUNNING";
private static String failedState = "FAILED";
Logger logger;
TaskProxy proxy;
public WorkerWatcher(TaskProxy proxy, Logger logger){
this.proxy = proxy;
this.logger = logger;
currentTries = 1;
}
public String getState(){
String state ="";
try{
proxy.synchronize();
state = proxy.getState();
return state;
}catch(Exception e){
logger.error("Error in getting state: recover try number "+currentTries,e);
currentTries++;
if (currentTries>maxTries){
return failedState;
}
else return runningState;
}
}
}

@ -0,0 +1,34 @@
package org.gcube.dataanalysis.executor.messagequeue;
public enum ATTRIBUTE {
STATUS,
DONE,
STARTED,
FINISHED,
PROCESSING,
FATAL_ERROR,
TRIVIAL_ERROR,
ORDER,
NODE,
CONTENT,
QSESSION,
TOPIC_NAME,
QUEUE_USER,
QUEUE_PASSWORD,
QUEUE_URL,
TOPIC_RESPONSE_NAME,
ERASE,
FILE_NAMES,
FILE_URLS,
CONFIGURATION,
OUTPUTDIR,
OWNER,
REMOTEDIR,
SERVICE_CLASS,
SERVICE_NAME,
SCOPE,
SCRIPT,
ARGUMENTS,
CLEAN_CACHE,
ERROR
}

@ -0,0 +1,62 @@
package org.gcube.dataanalysis.executor.messagequeue;
import javax.jms.ExceptionListener;
import javax.jms.JMSException;
import javax.jms.MessageConsumer;
import javax.jms.MessageListener;
public class Consumer {
public QueueManager manager;
public MessageConsumer consumer;
private MessageListener consumerCallback;
private ExceptionListener errorCallback;
private String topic;
public Consumer(QueueManager manager, MessageListener consumerCallback, ExceptionListener errorCallback, String topic) throws JMSException, InterruptedException {
this.manager = manager;
this.consumerCallback = consumerCallback;
this.errorCallback = errorCallback;
this.topic = topic;
create();
}
private void create() throws JMSException, InterruptedException {
// Topic ConsumerTopic = manager.session.createTopic(topic);
MessageConsumer consumer = manager.session.createConsumer(manager.destination);
// MessageConsumer consumer = manager.session.createDurableSubscriber(ConsumerTopic, "Consumer."+topic);
// MessageConsumer consumer = manager.session.createConsumer(ConsumerTopic);
manager.connection.setExceptionListener(errorCallback);
consumer.setMessageListener(consumerCallback);
}
public void standBy() throws JMSException {
if (consumer != null)
consumer.close();
}
public void wake() throws Exception {
this.create();
}
public void stop() throws JMSException {
if (consumer != null) {
consumer.close();
}
// closeSession();
}
public void closeSession() throws JMSException {
try {
manager.closeSession();
manager.connection.close();
} catch (Exception e) {
}
}
}

@ -0,0 +1,70 @@
package org.gcube.dataanalysis.executor.messagequeue;
import java.util.UUID;
import javax.jms.DeliveryMode;
import javax.jms.JMSException;
import javax.jms.Message;
import javax.jms.MessageProducer;
import javax.jms.TextMessage;
import javax.jms.Topic;
public class Producer {
public MessageProducer producer;
public QueueManager manager;
public String topic;
public String identifier;
public Producer(QueueManager manager,String topic) throws JMSException {
this.manager = manager;
this.topic = topic;
this.identifier = "" + UUID.randomUUID();
create();
}
private void create() throws JMSException {
// Topic ProducerTopic = manager.session.createTopic(topic);
producer = manager.session.createProducer(manager.destination);
// producer = manager.session.createProducer(ProducerTopic);
producer.setDeliveryMode(DeliveryMode.PERSISTENT);
}
public void sendTextMessage(String text, long timeToLive) throws JMSException {
TextMessage message = manager.session.createTextMessage(text);
producer.setTimeToLive(timeToLive);
producer.send(message);
}
public void sendMessage(Object toSend, long timeToLive) throws JMSException {
Message message = manager.session.createMessage();
message.setObjectProperty(ATTRIBUTE.CONTENT.name(), toSend);
producer.setTimeToLive(timeToLive);
producer.send(message);
}
public void standBy() throws JMSException {
producer.close();
}
public void wake() throws Exception {
this.create();
}
public void stop() throws JMSException {
if (producer != null){
producer.close();
}
// closeSession();
}
public void closeSession() throws JMSException {
try {
manager.closeSession();
manager.connection.close();
} catch (Exception e) {
}
}
}

@ -0,0 +1,8 @@
package org.gcube.dataanalysis.executor.messagequeue;
public class QCONSTANTS {
public static int refreshStatusTime = 60000;
public static int QueueLifeTime = 60000;//3600000;
public static long timeToLive = 0;
}

@ -0,0 +1,77 @@
package org.gcube.dataanalysis.executor.messagequeue;
import java.util.Hashtable;
import java.util.Properties;
import java.util.UUID;
import javax.jms.Connection;
import javax.jms.ConnectionFactory;
import javax.jms.Destination;
import javax.jms.JMSException;
import javax.jms.Session;
import javax.management.ObjectName;
import javax.management.remote.JMXConnector;
import javax.management.remote.JMXConnectorFactory;
import javax.management.remote.JMXServiceURL;
import javax.naming.Context;
import javax.naming.InitialContext;
import org.apache.activemq.ActiveMQConnectionFactory;
import org.apache.activemq.broker.BrokerService;
import org.apache.activemq.broker.jmx.QueueViewMBean;
public class QueueManager {
public ActiveMQConnectionFactory connectionFactory;
public Connection connection;
public Session session;
public Destination destination;
boolean transacted = false;
public String mqurl;
private String identifier;
public void createAndConnect(String user,String password, String mqurl, String queueName) throws JMSException {
this.mqurl=mqurl;
connect(user,password,mqurl);
session = connection.createSession(transacted, Session.CLIENT_ACKNOWLEDGE);
this.identifier = ""+UUID.randomUUID();
/*
Hashtable properties = new Hashtable();
properties.put(Context.INITIAL_CONTEXT_FACTORY, "org.apache.activemq.jndi.ActiveMQInitialContextFactory");
properties.put(Context.PROVIDER_URL, mqurl);
InitialContext context = new InitialContext(properties);
ConnectionFactory factory = (ConnectionFactory) context.lookup("ConnectionFactory");
destination = (Destination) context.lookup(queueName);
*/
// destination = session.createQueue(queueName+"?consumer.prefetchSize=3");
destination = session.createQueue(queueName+"?wireFormat.maxInactivityDurationInitalDelay=3600000&requestTimeout=240000&wireFormat.maxInactivityDuration=3600000");
}
public void destroy(){
}
private void connect(String user,String password, String mqurl) throws JMSException{
connectionFactory = new ActiveMQConnectionFactory(user, password, mqurl);
connectionFactory.getPrefetchPolicy().setQueuePrefetch(1);
// Properties p = new Properties();
// p.put("persistent", "false");
// p.put("consumer.prefetchSize", "3");
// p.put("ms.prefetchPolicy.all", "3");
// p.put("cms.PrefetchPolicy.queuePrefetch", "3");
// connectionFactory.setProperties(p);
connection = connectionFactory.createConnection();
connection.setClientID(identifier);
connection.start();
}
public void closeSession() throws Exception{
// session.unsubscribe(identifier);
session.close();
}
}

@ -0,0 +1,19 @@
package org.gcube.dataanalysis.executor.nodes.algorithms;
public class AquamapsNative2050Node extends AquamapsNativeNode{
public AquamapsNative2050Node(){
super();
type = "2050";
}
public String getName() {
return "AQUAMAPS_NATIVE_2050";
}
public String getDescription() {
return "Algorithm for Native Range in 2050 by Aquamaps on a single node";
}
}

@ -0,0 +1,70 @@
package org.gcube.dataanalysis.executor.nodes.algorithms;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import org.gcube.contentmanagement.graphtools.utils.MathFunctions;
import org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative;
public class AquamapsNativeNode extends AquamapsSuitableNode{
public AquamapsNativeNode(){
super();
}
public String getName() {
return "AQUAMAPS_NATIVE";
}
public String getDescription() {
return "Algorithm for Native Range by Aquamaps on a single node";
}
// writes the distribution model on the DB: input species vector + list of areas vectors to report
public void singleStepPostprocess(Object species) {
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Analyzing Species distribution");
// write info on DB
Queue<String> rows = new ConcurrentLinkedQueue<String>();
String speciesID = AquamapsSuitableFunctions.getMainInfoID(species);
Map<String, Float> csquaresMap = operations.completeDistribution.get(speciesID);
if (csquaresMap != null) {
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Getting csquare probabilites");
// write only processed areas
for (String singleCsquare : csquaresMap.keySet()) {
String additionalInformation = operations.getAdditionalInformation(species, operations.processedAreas.get(singleCsquare));
if (additionalInformation == null)
additionalInformation = "";
else if (additionalInformation.length() > 0)
additionalInformation = "," + additionalInformation.trim();
float prob = 0f;
try {
prob = csquaresMap.get(singleCsquare);
} catch (Exception e) {
System.out.println("Aquamaps Algorithm Single Step PostProcess ->Error in getting probability value at " + speciesID + " , " + singleCsquare);
}
if (prob > 0)
rows.offer("'" + speciesID + "','" + singleCsquare + "','" + MathFunctions.roundDecimal(prob, 3) + "'" + additionalInformation);
}
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Filtering probabilities. Size:"+rows.size());
Queue<String> newrows = new AquamapsNative().filterProbabilitySet(rows);
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Filtered probabilities. Size:"+newrows.size());
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Writing rows on DB");
List<String> toWrite = new ArrayList<String>();
for (String row:newrows){
toWrite.add(row);
// System.out.println("Added row: "+row);
}
AquamapsSuitableFunctions.writeOnDB(toWrite, currentconfig.getParam("DistributionTable"), dbHibConnection);
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Rows written on DB");
}
else
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Probability distribution is void");
}
}

@ -0,0 +1,19 @@
package org.gcube.dataanalysis.executor.nodes.algorithms;
public class AquamapsSuitable2050Node extends AquamapsSuitableNode{
public AquamapsSuitable2050Node(){
super();
type = "2050";
}
public String getName() {
return "AQUAMAPS_SUITABLE_2050";
}
public String getDescription() {
return "Algorithm for Suitable Range in 2050 by Aquamaps on a single node";
}
}

@ -0,0 +1,200 @@
package org.gcube.dataanalysis.executor.nodes.algorithms;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.ObjectInputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsAlgorithmCore;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.utils.Transformations;
import org.hibernate.SessionFactory;
import com.thoughtworks.xstream.XStream;
public class AquamapsSuitableFunctions {
public static String countAllSpeciesQuery = "select count(*) from %1$s;";
// public static String countAll = "select count(*) from %1$s;";
public static String countAll = "EXPLAIN SELECT * FROM %1$s;";
public static String countCsquareCodeQuery = "select count (*) from %1$s d where oceanarea>0";
public static String selectAllSpeciesQuery = "select depthmin,meandepth,depthprefmin,pelagic,depthprefmax,depthmax,tempmin,layer,tempprefmin,tempprefmax,tempmax,salinitymin,salinityprefmin,salinityprefmax,salinitymax,primprodmin,primprodprefmin,primprodprefmax,primprodmax,iceconmin,iceconprefmin,iceconprefmax,iceconmax,landdistyn,landdistmin,landdistprefmin,landdistprefmax,landdistmax,nmostlat,smostlat,wmostlong,emostlong,faoareas,speciesid from %1$s order by speciesid limit %2$s offset %3$s;";
public static String csquareCodeQuery = "select csquarecode,depthmean,depthmax,depthmin, sstanmean,sbtanmean,salinitymean,salinitybmean, primprodmean,iceconann,landdist,oceanarea,centerlat,centerlong,faoaream,eezall,lme from %1$s d where oceanarea>0 order by csquarecode limit %2$s offset %3$s";
public static String createTableStatement = "CREATE TABLE %1$s ( speciesid character varying, csquarecode character varying, probability real, boundboxyn smallint, faoareayn smallint, faoaream integer, eezall character varying, lme integer) WITH (OIDS=FALSE ) #TABLESPACE#; CREATE INDEX CONCURRENTLY %1$s_idx ON %1$s USING btree (speciesid, csquarecode, faoaream, eezall, lme);";
public static String metainfo = "boundboxyn, faoareayn, faoaream, eezall, lme";
public static String selectAllSpeciesObservationQuery = "SELECT speciesid,maxclat,minclat from %1$s;";
public static String probabilityInsertionStatement = "insert into %1$s (speciesid,csquarecode,probability %ADDEDINFORMATION%) VALUES %2$s";
public static String deleteDuplicates = "delete from %1$s where speciesid='%2$s'";
// Default Files
private static String speciesFile = "species.dat";
private static String csquaresFile = "csquares.dat";
private static String maxminlatFile = "maxminlat.dat";
private static String configFile = "config.dat";
// file1
public HashMap<String, List<Object>> allSpeciesObservations;
// file2
public List<Object> speciesVectors;
// file3
public List<Object> environmentVectors;
public int numberOfSpecies;
public int numberOfCells;
//processing variables
public AlgorithmConfiguration currentconfig;
public HashMap<String, String> currentSpeciesBoundingBoxInfo;
public String currentFAOAreas;
public AquamapsAlgorithmCore core;
public String type;
public HashMap<String, Object> processedAreas;
public ConcurrentHashMap<String, Map<String, Float>> completeDistribution;
public AquamapsSuitableFunctions(AquamapsAlgorithmCore core, String type, AlgorithmConfiguration config) {
this.core = core;
this.type = type;
this.currentconfig = config;
}
//PROBABILITY CALCULATION
// calculates probability and takes into account the processes areas by this node
public float calcProb(Object species, Object area) {
float prob = (float) core.getSpeciesProb((Object[]) species, (Object[]) area);
String speciesID = getMainInfoID(species);
String csquareCode = getGeographicalID(area);
if (completeDistribution == null)
completeDistribution = new ConcurrentHashMap<String, Map<String, Float>>();
Map<String, Float> geoDistrib = completeDistribution.get(speciesID);
// if the map is null then generate a new map, otherwise update it
if (geoDistrib == null) {
geoDistrib = new ConcurrentHashMap<String, Float>();
completeDistribution.put(speciesID, geoDistrib);
}
if (prob > 0.1) {
// record the overall probability distribution
geoDistrib.put(csquareCode, prob);
if (processedAreas == null)
processedAreas = new HashMap<String, Object>();
processedAreas.put(csquareCode, area);
}
return prob;
}
//BOUNDING BOX CALCULATION
// calculates the bounding box information
public HashMap<String, Integer> calculateBoundingBox(Object[] csquarecode) {
HashMap<String, Integer> boundingInfo = core.calculateBoundingBox("" + csquarecode[0], currentSpeciesBoundingBoxInfo.get("$pass_NS"), currentSpeciesBoundingBoxInfo.get("$pass_N"), currentSpeciesBoundingBoxInfo.get("$pass_S"), AquamapsAlgorithmCore.getElement(csquarecode, 12),// centerlat
AquamapsAlgorithmCore.getElement(csquarecode, 13),// centerlong
AquamapsAlgorithmCore.getElement(csquarecode, 14),// faoaream
currentSpeciesBoundingBoxInfo.get("$paramData_NMostLat"), currentSpeciesBoundingBoxInfo.get("$paramData_SMostLat"), currentSpeciesBoundingBoxInfo.get("$paramData_WMostLong"), currentSpeciesBoundingBoxInfo.get("$paramData_EMostLong"), currentFAOAreas, currentSpeciesBoundingBoxInfo.get("$northern_hemisphere_adjusted"), currentSpeciesBoundingBoxInfo.get("$southern_hemisphere_adjusted"));
return boundingInfo;
}
// initializes currentFAOAreas and currentSpeciesBoundingBoxInfo
public void getBoundingBoxInformation(Object[] speciesInfoRow, Object[] speciesObservations) {
Object[] row = speciesInfoRow;
String $paramData_NMostLat = AquamapsAlgorithmCore.getElement(row, 28);
String $paramData_SMostLat = AquamapsAlgorithmCore.getElement(row, 29);
String $paramData_WMostLong = AquamapsAlgorithmCore.getElement(row, 30);
String $paramData_EMostLong = AquamapsAlgorithmCore.getElement(row, 31);
currentFAOAreas = AquamapsAlgorithmCore.getElement(row, 32);
// adjust FAO areas
currentFAOAreas = core.procFAO_2050(currentFAOAreas);
// get Bounding Box Information
// System.out.println("TYPE:"+type);
currentSpeciesBoundingBoxInfo = core.getBoundingBoxInfo($paramData_NMostLat, $paramData_SMostLat, $paramData_WMostLong, $paramData_EMostLong, speciesObservations, type);
// end of get BoundingBoxInformation
}
// DATABASE INTERACTION
public static void writeOnDB(List<String> buffer, String destinationTable, SessionFactory dbHibConnection) {
int endIndex = buffer.size();
if (endIndex > 0) {
System.out.println("\tWriting Buffer is not empty: "+endIndex);
String $probabilityInsertionStatement = AquamapsSuitableFunctions.probabilityInsertionStatement.replace("%ADDEDINFORMATION%", ","+metainfo);
StringBuffer sb = new StringBuffer();
// System.out.println("writeOnDB()->PROBABILITIES BUFFER SIZE DELETION");
for (int i = 0; i < endIndex; i++) {
sb.append("(" + buffer.get(i) + ")");
if (i < endIndex - 1) {
sb.append(",");
}
}
String insertionString = String.format($probabilityInsertionStatement, destinationTable, sb.toString());
try {
// System.out.println(insertionString);
DatabaseFactory.executeSQLUpdate(insertionString, dbHibConnection);
} catch (Exception e) {
e.printStackTrace();
}
}
else
System.out.println("\tWarning : writing buffer is empty!");
System.out.println("\tWriting on DB FINISHED");
}
//FILES MANAGEMENT
public void dumpAll(String path) throws Exception {
Transformations.dumpObjectToFile(path + configFile, currentconfig);
// Transformations.dumpObjectToFile(path + csquaresFile, environmentVectors);
}
public void rebuildConfig(String configFile) throws Exception{
FileInputStream fis = new FileInputStream(new File(configFile));
currentconfig = (AlgorithmConfiguration) new XStream().fromXML(fis);
fis.close();
}
// when uploaded the files will be local
public void rebuildAll(int cellOrdinal, int chunksize, int speciesOrdinal, int speciesChunkSize, String pathToFiles) throws Exception {
// currentconfig = (AlgorithmConfiguration) Transformations.getObjectFromFile(pathToFiles+configFile);
/*
try{
environmentVectors = (List<Object>) Transformations.getObjectFromFile(pathToFiles+csquaresFile);
}catch(Exception e){
System.out.println("\tError in retrieving environmental vectors");
}
*/
}
public String getAdditionalInformation(Object species, Object area) {
Object[] arearray = (Object[]) area;
HashMap<String, Integer> boundingInfo = calculateBoundingBox(arearray);
String addedInformation = "'" + boundingInfo.get("$InBox") + "','" + boundingInfo.get("$InFAO") + "','" + arearray[14] + "','" + arearray[15] + "','" + arearray[16] + "'";
return addedInformation;
}
//AUXILIARY FUNCTIONS
public static String getMainInfoID(Object speciesInfo) {
String s = "" + ((Object[]) speciesInfo)[33];
return s;
}
public static String getGeographicalID(Object geoInfo) {
String s = "" + ((Object[]) geoInfo)[0];
return s;
}
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save