Branching to have a new version depending on new SmartExecutor

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngineSmartExecutor@112013 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Luca Frosini 2015-02-13 14:29:19 +00:00
commit 8c8d1c3167
167 changed files with 17214 additions and 0 deletions

7
.classpath Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"/>
<classpathentry kind="output" path="target/classes"/>
</classpath>

23
.project Normal file
View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>EcologicalEngineExecutor</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>

View File

@ -0,0 +1,4 @@
#Fri Jun 22 18:05:41 CEST 2012
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding/<project>=UTF-8

View File

@ -0,0 +1,13 @@
#Fri Jun 22 18:05:41 CEST 2012
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.compliance=1.6
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.6

View File

@ -0,0 +1,5 @@
#Fri Jun 22 17:51:31 CEST 2012
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

View File

@ -0,0 +1,17 @@
<?xml version='1.0' encoding='UTF-8'?>
<hibernate-configuration>
<session-factory>
<property name="connection.driver_class">org.postgresql.Driver</property>
<property name="connection.provider_class">org.hibernate.connection.C3P0ConnectionProvider</property>
<property name="connection.url">jdbc:postgresql://localhost/testdb</property>
<property name="connection.username">gcube</property>
<property name="connection.password">d4science2</property>
<property name="dialect">org.hibernate.dialect.PostgreSQLDialect</property>
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
<property name="c3p0.timeout">0</property>
<property name="c3p0.max_size">1</property>
<property name="c3p0.max_statements">0</property>
<property name="c3p0.min_size">1</property>
<property name="current_session_context_class">thread</property>
</session-factory>
</hibernate-configuration>

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,5 @@
#!/bin/sh
# AQUAMAPS_SUITABLE
cd $1
java -Xmx1024M -classpath ./:./aquamapsnode.jar:./c3p0-0.9.1.2.jar:./commons-collections-3.1.jar:./dom4j-1.6.1.jar:./ecologicalDataMining.jar:./hibernate3.jar:./jaxen-1.1.2.jar:./jta-1.1.jar:./log4j-1.2.16.jar:./postgresql-8.4-702.jdbc4.jar:./slf4j-api-1.6.0.jar:./slf4j-log4j12-1.6.0.jar:./xpp3_min-1.1.4c.jar:./xstream-1.3.1.jar org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsSuitableNode $2 execution.output

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,696 @@
##--------------------------------------------------------
## CMSY analysis with estimation of total biomass, including Bayesian Schaefer
## written by Rainer Froese with support from Gianpaolo Coro in 2013-2014
## This version adjusts biomass to average biomass over the year
## It also contains the FutureCrash option to improve prediction of final biomass
## Version 21 adds the purple point to indicate the 25th percentile of final biomass
## Version 22 accepts that no biomass or CPUE area available
##--------------------------------------------------------
library(R2jags) # Interface with JAGS
library(coda)
#-----------------------------------------
# Some general settings
#-----------------------------------------
# set.seed(999) # use for comparing results between runs
rm(list=ls(all=TRUE)) # clear previous variables etc
options(digits=3) # displays all numbers with three significant digits as default
graphics.off() # close graphics windows from previous sessions
#-----------------------------------------
# General settings for the analysis
#-----------------------------------------
sigR <- 0.02 # overall process error; 0.05 works reasonable for simulations, 0.02 for real data; 0 if deterministic model
n <- 10000 # initial number of r-k pairs
batch.mode <- T # set to TRUE to suppress graphs
write.output <- T # set to true if table of output is wanted
FutureCrash <- "No"
#-----------------------------------------
# Start output to screen
#-----------------------------------------
cat("-------------------------------------------\n")
cat("Catch-MSY Analysis,", date(),"\n")
cat("-------------------------------------------\n")
#------------------------------------------
# Read data and assign to vectors
#------------------------------------------
# filename_1 <- "AllStocks_Catch4.csv"
# filename_2 <- "AllStocks_ID4.csv"
# filename_1 <- "SimCatch.csv"
# filename_2 <- "SimSpec.csv"
# filename_2 <- "SimSpecWrongS.csv"
# filename_2 <- "SimSpecWrongI.csv"
# filename_2 <- "SimSpecWrongF.csv"
# filename_2 <- "SimSpecWrongH.csv"
# filename_2 <- "SimSpecWrongL.csv"
# filename_1 <- "FishDataLim.csv"
# filename_2 <- "FishDataLimSpec.csv"
filename_1 <- "WKLIFE4Stocks.csv"
filename_2 <- "WKLIFE4ID.csv"
outfile<-"outfile"
outfile.txt <- "outputfile.txt"
cdat <- read.csv(filename_1, header=T, dec=".", stringsAsFactors = FALSE)
cinfo <- read.csv(filename_2, header=T, dec=".", stringsAsFactors = FALSE)
cat("Files", filename_1, ",", filename_2, "read successfully","\n")
# Stocks with total biomass data and catch data from StartYear to EndYear
# stocks <- sort(as.character(cinfo$stock)) # All stocks
stocks<-"HLH_M07"
# select one stock after the other
for(stock in stocks) {
# assign data from cinfo to vectors
res <- as.character(cinfo$Resilience[cinfo$stock==stock])
StartYear <- as.numeric(cinfo$StartYear[cinfo$stock==stock])
EndYear <- as.numeric(cinfo$EndYear[cinfo$stock==stock])
r_low <- as.numeric(cinfo$r_low[cinfo$stock==stock])
r_hi <- as.numeric(cinfo$r_hi[cinfo$stock==stock])
stb_low <- as.numeric(cinfo$stb_low[cinfo$stock==stock])
stb_hi <- as.numeric(cinfo$stb_hi[cinfo$stock==stock])
intyr <- as.numeric(cinfo$intyr[cinfo$stock==stock])
intbio_low <- as.numeric(cinfo$intbio_low[cinfo$stock==stock])
intbio_hi <- as.numeric(cinfo$intbio_hi[cinfo$stock==stock])
endbio_low <- as.numeric(cinfo$endbio_low[cinfo$stock==stock])
endbio_hi <- as.numeric(cinfo$endbio_hi[cinfo$stock==stock])
Btype <- as.character(cinfo$Btype[cinfo$stock==stock])
FutureCrash <- as.character(cinfo$FutureCrash[cinfo$stock==stock])
comment <- as.character(cinfo$comment[cinfo$stock==stock])
# extract data on stock
yr <- as.numeric(cdat$yr[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])
ct <- as.numeric(cdat$ct[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that catch is given in tonnes, transforms to '000 tonnes
if(Btype=="observed" | Btype=="CPUE" | Btype=="simulated") {
bt <- as.numeric(cdat$TB[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that biomass is in tonnes, transforms to '000 tonnes
} else {bt <- NA}
nyr <- length(yr) # number of years in the time series
if(Btype!="observed") {bio <- bt}
# change biomass to moving average as assumed by Schaefer (but not for simulations or CPUE)
# for last year use reported bio
if(Btype=="observed") {
ma <- function(x){filter(x,rep(1/2,2),sides=2)}
bio <- ma(bt)
bio[length(bio)] <- bt[length(bt)] }
# initialize vectors for viable r, k, bt
rv.all <- vector()
kv.all <- vector()
btv.all <- matrix(data=vector(),ncol=nyr+1)
#----------------------------------------------------
# Determine initial ranges for parameters and biomass
#----------------------------------------------------
# initial range of r from input file
if(is.na(r_low)==F & is.na(r_hi)==F) {
start_r <- c(r_low,r_hi)
} else {
# initial range of r and CatchMult values based on resilience
if(res == "High") {
start_r <- c(0.6,1.5)} else if(res == "Medium") {
start_r <- c(0.2,0.8)} else if(res == "Low") {
start_r <- c(0.05,0.5)} else { # i.e. res== "Very low"
start_r <- c(0.015,0.1)}
}
# initial range of k values, assuming k will always be larger than max catch
# and max catch will never be smaller than a quarter of MSY
start_k <- c(max(ct),16*max(ct)/start_r[1])
# initial biomass range from input file
if(is.na(stb_low)==F & is.na(stb_hi)==F) {
startbio <- c(stb_low,stb_hi)
} else {
# us low biomass at start as default
startbio <- c(0.1,0.5)
}
MinYear <- yr[which.min(ct)]
MaxYear <- yr[which.max(ct)]
# use year and biomass range for intermediate biomass from input file
if(is.na(intbio_low)==F & is.na(intbio_hi)==F) {
intyr <- intyr
intbio <- c(intbio_low,intbio_hi)
# else if year of minimum catch is at least 3 years away from StartYear and EndYear of series, use min catch
} else if((MinYear - StartYear) > 3 & (EndYear - MinYear) > 3 ) {
# assume that biomass range in year before minimum catch was 0.01 - 0.4
intyr <- MinYear-1
intbio <- c(0.01,0.4)
# else if year of max catch is at least 3 years away from StartYear and EndYear of series, use max catch
} else if((MaxYear - StartYear) > 3 & (EndYear - MaxYear) > 3 ) {
# assume that biomass range in year before maximum catch was 0.3 - 0.9
intyr <- MaxYear-1
intbio <- c(0.3,0.9)
} else {
# assume uninformative range 0-1 in mid-year
intyr <- as.integer(mean(c(StartYear, EndYear)))
intbio <- c(0,1) }
# end of intbio setting
# final biomass range from input file
if(is.na(endbio_low)==F & is.na(endbio_hi)==F) {
endbio <- c(endbio_low,endbio_hi)
} else {
# else use Catch/maxCatch to estimate final biomass
endbio <- if(ct[nyr]/max(ct) > 0.5) {c(0.4,0.8)} else {c(0.01,0.4)}
} # end of final biomass setting
#----------------------------------------------
# MC with Schaefer Function filtering
#----------------------------------------------
Schaefer <- function(ri, ki, startbio, intyr, intbio, endbio, sigR, pt) {
# if stock is not expected to crash within 3 years if last catch continues
if(FutureCrash == "No") {
yr.s <- c(yr,EndYear+1,EndYear+2,EndYear+3)
ct.s <- c(ct,ct[yr==EndYear],ct[yr==EndYear],ct[yr==EndYear])
nyr.s <- length(yr.s)
} else{
yr.s <- yr
ct.s <- ct
nyr.s <- nyr
}
# create vector for initial biomasses
startbt <-seq(from =startbio[1], to=startbio[2], by = (startbio[2]-startbio[1])/10)
# create vectors for viable r, k and bt
rv <- array(-1:-1,dim=c(length(ri)*length(startbt))) #initialize array with -1. The -1 remaining after the process will be removed
kv <- array(-1:-1,dim=c(length(ri)*length(startbt)))
btv <- matrix(data=NA, nrow = (length(ri)*length(startbt)), ncol = nyr+1)
intyr.i <- which(yr.s==intyr) # get index of intermediate year
#loop through r-k pairs
npoints = length(ri)
nstartb = length(startbt)
for(i in 1 : npoints) {
if (i%%1000==0)
cat(".")
# create empty vector for annual biomasses
bt <- vector()
# loop through range of relative start biomasses
for(j in startbt) {
# set initial biomass, including process error
bt[1]=j*ki[i]*exp(rnorm(1,0, sigR)) ## set biomass in first year
#loop through years in catch time series
for(t in 1:nyr.s) { # for all years in the time series
xt=rnorm(1,0, sigR) # set new random process error for every year
# calculate biomass as function of previous year's biomass plus surplus production minus catch
bt[t+1]=(bt[t]+ri[i]*bt[t]*(1-bt[t]/ki[i])-ct.s[t])*exp(xt)
# if biomass < 0.01 k or > 1.1 k, discard r-k pair
if(bt[t+1] < 0.01*ki[i] || bt[t+1] > 1.1*ki[i]) { break } # stop looping through years, go to next upper level
if ((t+1)==intyr.i && (bt[t+1]>(intbio[2]*ki[i]) || bt[t+1]<(intbio[1]*ki[i]))) { break } #intermediate year check
} # end of loop of years
# if last biomass falls without expected ranges goto next r-k pair
if(t < nyr.s || bt[yr.s==EndYear] > (endbio[2]*ki[i]) || bt[yr.s==EndYear] < (endbio[1]*ki[i])) {
next } else {
# store r, k, and bt, plot point, then go to next startbt
rv[((i-1)*nstartb)+j] <- ri[i]
kv[((i-1)*nstartb)+j] <- ki[i]
btv[((i-1)*nstartb)+j,] <- bt[1:(nyr+1)]/ki[i] #substitute a row into the matrix, exclude FutureCrash years
if(pt==T) {points(x=ri[i], y=ki[i], pch=".", cex=2, col="black")
next }
}
} # end of loop of initial biomasses
} # end of loop of r-k pairs
rv=rv[rv!=-1]
kv=kv[kv!=-1]
btv=na.omit(btv) #delete first line
cat("\n")
return(list(rv, kv,btv))
} # end of Schaefer function
#------------------------------------------------------------------
# Uniform sampling of the r-k space
#------------------------------------------------------------------
# get random set of r and k from log space distribution
ri1 = exp(runif(n, log(start_r[1]), log(start_r[2])))
ki1 = exp(runif(n, log(start_k[1]), log(start_k[2])))
#-----------------------------------------------------------------
# Plot data and progress
#-----------------------------------------------------------------
#windows(14,9)
par(mfcol=c(2,3))
# plot catch
plot(x=yr, y=ct, ylim=c(0,1.2*max(ct)), type ="l", bty="l", main=paste(stock,"catch"), xlab="Year",
ylab="Catch", lwd=2)
points(x=yr[which.max(ct)], y=max(ct), col="red", lwd=2)
points(x=yr[which.min(ct)], y=min(ct), col="red", lwd=2)
# plot r-k graph
plot(ri1, ki1, xlim = start_r, ylim = start_k, log="xy", xlab="r", ylab="k", main="Finding viable r-k", pch=".", cex=2, bty="l", col="lightgrey")
#1 - Call MC-Schaefer function to preliminary explore the space without prior information
cat(stock, ": First Monte Carlo filtering of r-k space with ",n," points\n")
MCA <- Schaefer(ri=ri1, ki=ki1, startbio=startbio, intyr=intyr, intbio=intbio, endbio=endbio, sigR=sigR, pt=T)
rv.all <- append(rv.all,MCA[[1]])
kv.all <- append(kv.all,MCA[[2]])
btv.all <- rbind(btv.all,MCA[[3]])
#take viable r and k values
nviablepoints = length(rv.all)
cat("* Found ",nviablepoints," viable points from ",n," samples\n");
#if few points were found then resample and shrink the k log space
if (nviablepoints<=1000){
log.start_k.new <- log(start_k)
max_attempts = 3
current_attempts = 1
while (nviablepoints<=1000 && current_attempts<=max_attempts){
if(nviablepoints > 0) {
log.start_k.new[1] <- mean(c(log.start_k.new[1], min(log(kv.all))))
log.start_k.new[2] <- mean(c(log.start_k.new[2], max(log(kv.all)))) }
n.new=n*current_attempts #add more points
ri1 = exp(runif(n.new, log(start_r[1]), log(start_r[2])))
ki1 = exp(runif(n.new, log.start_k.new[1], log.start_k.new[2]))
cat("Shrinking k space: repeating Monte Carlo in the interval [",exp(log.start_k.new[1]),",",exp(log.start_k.new[2]),"]\n")
cat("Attempt ",current_attempts," of ",max_attempts," with ",n.new," points","\n")
MCA <- Schaefer(ri=ri1, ki=ki1, startbio=startbio, intyr=intyr, intbio=intbio, endbio=endbio, sigR=sigR, pt=T)
rv.all <- append(rv.all,MCA[[1]])
kv.all <- append(kv.all,MCA[[2]])
btv.all <- rbind(btv.all,MCA[[3]])
nviablepoints = length(rv.all) #recalculate viable points
cat("* Found altogether",nviablepoints," viable points \n");
current_attempts=current_attempts+1 #increment the number of attempts
}
}
# If tip of viable r-k pairs is 'thin', do extra sampling there
gm.rv = exp(mean(log(rv.all)))
if(length(rv.all[rv.all > 0.9*start_r[2]]) < 10) {
l.sample.r <- (gm.rv + max(rv.all))/2
cat("Final sampling in the tip area above r =",l.sample.r,"\n")
log.start_k.new <- c(log(0.8*min(kv.all)),log(max(kv.all[rv.all > l.sample.r])))
ri1 = exp(runif(50000, log(l.sample.r), log(start_r[2])))
ki1 = exp(runif(50000, log.start_k.new[1], log.start_k.new[2]))
MCA <- Schaefer(ri=ri1, ki=ki1, startbio=startbio, intyr=intyr, intbio=intbio, endbio=endbio, sigR=sigR, pt=T)
rv.all <- append(rv.all,MCA[[1]])
kv.all <- append(kv.all,MCA[[2]])
btv.all <- rbind(btv.all,MCA[[3]])
nviablepoints = length(rv.all) #recalculate viable points
cat("Found altogether", length(rv.all), "unique viable r-k pairs and biomass trajectories\n")
}
# ------------------------------------------------------------
# Bayesian analysis of catch & biomass with Schaefer model
# ------------------------------------------------------------
if(Btype == "observed" | Btype=="simulated") {
cat("Running Schaefer MCMC analysis....\n")
mcmc.burn <- as.integer(30000)
mcmc.chainLength <- as.integer(60000) # burn-in plus post-burn
mcmc.thin = 10 # to reduce autocorrelation
mcmc.chains = 3 # needs to be at least 2 for DIC
# Parameters to be returned by JAGS
jags.save.params=c('r','k','sigma.b', 'alpha', 'sigma.r') #
# JAGS model
Model = "model{
# to avoid crash due to 0 values
eps<-0.01
# set a quite narrow variation from the expected value
sigma.b <- 1/16
tau.b <- pow(sigma.b,-2)
Bm[1] <- log(alpha*k)
bio[1] ~ dlnorm(Bm[1],tau.b)
for (t in 2:nyr){
bio[t] ~ dlnorm(Bm[t],tau.b)
Bm[t] <- log(max(bio[t-1] + r*bio[t-1]*(1 - (bio[t-1])/k) - ct[t-1], eps))
}
# priors
alpha ~ dunif(0.01,1) # needed for fit of first biomass
#inverse cubic root relationship between the range of viable r and the size of the search space
inverseRangeFactor <- 1/((start_r[2]-start_r[1])^1/3)
# give sigma some variability in the inverse relationship
sigma.r ~ dunif(0.001*inverseRangeFactor,0.02*inverseRangeFactor)
tau.r <- pow(sigma.r,-2)
rm <- log((start_r[1]+start_r[2])/2)
r ~ dlnorm(rm,tau.r)
# search in the k space from the center of the range. Allow high variability
km <- log((start_k[1]+start_k[2])/2)
tau.k <- pow(km,-2)
k ~ dlnorm(km,tau.k)
#end model
}"
# Write JAGS model to file
cat(Model, file="r2jags.bug")
### random seed
set.seed(runif(1,1,500)) # needed in JAGS
### run model
jags_outputs <- jags(data=c('ct','bio','nyr', 'start_r', 'start_k'),
working.directory=NULL, inits=NULL,
parameters.to.save= jags.save.params,
model.file="r2jags.bug", n.chains = mcmc.chains,
n.burnin = mcmc.burn, n.thin = mcmc.thin, n.iter = mcmc.chainLength,
refresh=mcmc.burn/20, )
# ------------------------------------------------------
# Results from JAGS Schaefer
# ------------------------------------------------------
r_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$r))
k_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$k))
## sigma_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$sigma.b))
alpha_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$alpha))
## sigma.r_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$sigma.r))
mean.log.r.jags <- mean(log(r_out))
SD.log.r.jags <- sd(log(r_out))
lcl.log.r.jags <- mean.log.r.jags-1.96*SD.log.r.jags
ucl.log.r.jags <- mean.log.r.jags+1.96*SD.log.r.jags
gm.r.jags <- exp(mean.log.r.jags)
lcl.r.jags <- exp(lcl.log.r.jags)
ucl.r.jags <- exp(ucl.log.r.jags)
mean.log.k.jags <- mean(log(k_out))
SD.log.k.jags <- sd(log(k_out))
lcl.log.k.jags <- mean.log.k.jags-1.96*SD.log.k.jags
ucl.log.k.jags <- mean.log.k.jags+1.96*SD.log.k.jags
gm.k.jags <- exp(mean.log.k.jags)
lcl.k.jags <- exp(lcl.log.k.jags)
ucl.k.jags <- exp(ucl.log.k.jags)
mean.log.MSY.jags<- mean(log(r_out)+log(k_out)-log(4))
SD.log.MSY.jags <- sd(log(r_out)+log(k_out)-log(4))
gm.MSY.jags <- exp(mean.log.MSY.jags)
lcl.MSY.jags <- exp(mean.log.MSY.jags-1.96*SD.log.MSY.jags)
ucl.MSY.jags <- exp(mean.log.MSY.jags+1.96*SD.log.MSY.jags)
} # end of MCMC Schaefer loop
#------------------------------------
# get results from CMSY
#------------------------------------
# get estimate of most probable r as median of mid log.r-classes above cut-off
# get remaining viable log.r and log.k
rem.log.r <- log(rv.all[rv.all > gm.rv])
rem.log.k <- log(kv.all[rv.all>gm.rv])
# get vectors with numbers of r and mid values in about 25 classes
hist.log.r <- hist(x=rem.log.r, breaks=25, plot=F)
log.r.counts <- hist.log.r$counts
log.r.mids <- hist.log.r$mids
# get most probable log.r as mean of mids with counts > 0
log.r.est <- median(log.r.mids[which(log.r.counts > 0)])
lcl.log.r <- as.numeric(quantile(x=log.r.mids[which(log.r.counts > 0)], 0.025))
ucl.log.r <- as.numeric(quantile(x=log.r.mids[which(log.r.counts > 0)], 0.975))
r.est <- exp(log.r.est)
lcl.r.est <- exp(lcl.log.r)
ucl.r.est <- exp(ucl.log.r)
# do linear regression of log k ~ log r with slope fixed to -1 (from Schaefer)
reg <- lm(rem.log.k ~ 1 + offset(-1*rem.log.r))
int.reg <- as.numeric(reg[1])
sd.reg <- sd(resid(reg))
se.reg <- summary(reg)$coefficients[2]
# get estimate of log(k) from y where x = log.r.est
log.k.est <- int.reg + (-1) * log.r.est
# get estimates of CL of log.k.est from y +/- SD where x = lcl.log r or ucl.log.r
lcl.log.k <- int.reg + (-1) * ucl.log.r - sd.reg
ucl.log.k <- int.reg + (-1) * lcl.log.r + sd.reg
k.est <- exp(log.k.est)
lcl.k.est <- exp(lcl.log.k)
ucl.k.est <- exp(ucl.log.k)
# get MSY from remaining log r-k pairs
log.MSY.est <- mean(rem.log.r + rem.log.k - log(4))
sd.log.MSY.est <- sd(rem.log.r + rem.log.k - log(4))
lcl.log.MSY.est <- log.MSY.est - 1.96*sd.log.MSY.est
ucl.log.MSY.est <- log.MSY.est + 1.96*sd.log.MSY.est
MSY.est <- exp(log.MSY.est)
lcl.MSY.est <- exp(lcl.log.MSY.est)
ucl.MSY.est <- exp(ucl.log.MSY.est)
# get predicted biomass vectors as median and quantiles of trajectories
median.btv <- apply(btv.all,2, median)
lastyr.bio <- median.btv[length(median.btv)-1]
nextyr.bio <- median.btv[length(median.btv)]
lcl.btv <- apply(btv.all,2, quantile, probs=0.025)
q.btv <- apply(btv.all,2, quantile, probs=0.25)
ucl.btv <- apply(btv.all,2, quantile, probs=0.975)
lcl.lastyr.bio <- lcl.btv[length(lcl.btv)-1]
ucl.lastyr.bio <- ucl.btv[length(lcl.btv)-1]
lcl.nextyr.bio <- lcl.btv[length(lcl.btv)]
ucl.nextyr.bio <- ucl.btv[length(lcl.btv)]
# -----------------------------------------
# Plot results
# -----------------------------------------
# Analysis of viable r-k pairs
plot(x=rv.all, y=kv.all, xlim=start_r,
ylim=c(0.9*min(kv.all, ifelse(Btype == "observed",k_out,NA), na.rm=T), 1.1*max(kv.all)),
pch=16, col="grey",log="xy", bty="l",
xlab="r", ylab="k", main="Analysis of viable r-k")
abline(v=gm.rv, lty="dashed")
# plot points and best estimate from full Schaefer analysis
if(Btype == "observed"|Btype=="simulated") {
# plot r-k pairs from MCMC
points(x=r_out, y=k_out, pch=16,cex=0.5)
# plot best r-k pair from MCMC
points(x=gm.r.jags, y=gm.k.jags, pch=19, col="green")
lines(x=c(lcl.r.jags, ucl.r.jags),y=c(gm.k.jags,gm.k.jags), col="green")
lines(x=c(gm.r.jags,gm.r.jags),y=c(lcl.k.jags, ucl.k.jags), col="green")
}
# if data are from simulation, plot true r and k
if(Btype=="simulated") {
l.stock <- nchar(stock) # get length of sim stock name
r.char <- substr(stock,l.stock-1,l.stock) # get last character of sim stock name
r.sim <- NA # initialize vector for r used in simulation
if(r.char=="_H") {r.sim=1; lcl.r.sim=0.8; ucl.r.sim=1.25} else
if(r.char=="_M") {r.sim=0.5;lcl.r.sim=0.4;ucl.r.sim=0.62} else
if(r.char=="_L") {r.sim=0.25;lcl.r.sim=0.2;ucl.r.sim=0.31} else {r.sim=0.05;lcl.r.sim=0.04;ucl.r.sim=0.062}
# plot true r-k point with error bars
points(x=r.sim, y=1000, pch=19, col="red")
# add +/- 20% error bars
lines(x=c(lcl.r.sim,ucl.r.sim), y=c(1000,1000), col="red")
lines(x=c(r.sim,r.sim), y=c(800,1250), col="red")
}
# plot blue dot for proposed r-k, with 95% CL lines
points(x=r.est, y=k.est, pch=19, col="blue")
lines(x=c(lcl.r.est, ucl.r.est),y=c(k.est,k.est), col="blue")
lines(x=c(r.est,r.est),y=c(lcl.k.est, ucl.k.est), col="blue")
# plot biomass graph
# determine k to use for red line in b/k plot
if(Btype=="simulated") {k2use <- 1000} else
if(Btype == "observed") {k2use <- gm.k.jags} else {k2use <- k.est}
# determine hight of y-axis in plot
max.y <- max(c(bio/k2use,ucl.btv,0.6,startbio[2], intbio[2],endbio[2]),na.rm=T)
plot(x=yr,y=median.btv[1:nyr], lwd=2, xlab="Year", ylab="Relative biomass b/k", type="l",
ylim=c(0,max.y), bty="l", main=paste("Pred. biomass vs ", Btype,sep=""))
lines(x=yr, y=lcl.btv[1:nyr],type="l")
lines(x=yr, y=ucl.btv[1:nyr],type="l")
points(x=EndYear,y=q.btv[yr==EndYear], col="purple", cex=1.5, lwd=2)
abline(h=0.5, lty="dashed")
abline(h=0.25, lty="dotted")
lines(x=c(yr[1],yr[1]), y=startbio, col="blue")
lines(x=c(intyr,intyr), y=intbio, col="blue")
lines(x=c(max(yr),max(yr)), y=endbio, col="blue")
# if observed biomass is available, plot red biomass line
if(Btype == "observed"|Btype=="simulated") {
lines(x=yr, y=bio/k2use,type="l", col="red", lwd=1)
}
# if CPUE data are available, scale to predicted biomass range, plot red biomass line
if(Btype == "CPUE") {
par(new=T) # prepares for new plot on top of previous
plot(x=yr, y=bio, type="l", col="red", lwd=1,
ann=F,axes=F,ylim=c(0,1.2*max(bio, na.rm=T))) # forces this plot on top of previous one
axis(4, col="red", col.axis="red")
}
# plot yield and biomass against equilibrium surplus parabola
max.y <-max(c(ct/MSY.est,ifelse(Btype=="observed"|Btype=="simulated",ct/gm.MSY.jags,NA),1.2),na.rm=T)
# plot parabola
x=seq(from=0,to=2,by=0.001)
y=4*x-(2*x)^2
plot(x=x, y=y, xlim=c(0,1), ylim=c(0,max.y), type="l", bty="l",xlab="Relative biomass b/k",
ylab="Catch / MSY", main="Equilibrium curve")
# plot catch against CMSY biomass estimates
points(x=median.btv[1:nyr], y=ct/MSY.est, pch=16, col="grey")
points(x=q.btv[yr==EndYear],y=ct[yr==EndYear]/MSY.est, col="purple", cex=1.5, lwd=2)
# plot catch against observed biomass or CPUE
if(Btype == "observed"|Btype=="simulated") {
points(x=bio/k2use, y=ct/gm.MSY.jags, pch=16, cex=0.5)
}
# plot exploitation rate u against u.msy
# get u derived from predicted CMSY biomass
u.CMSY <- ct/(median.btv[1:nyr]*k.est)
u.msy.CMSY <- 1-exp(-r.est/2) # # Fmsy from CMSY expressed as exploitation rate
# get u from observed or simulated biomass
if(Btype == "observed"|Btype=="simulated") {
u.bio <- ct/bio
u.msy.bio <- 1-exp(-gm.r.jags/2)
}
# get u from CPUE
if(Btype == "CPUE") {
q=max(median.btv[1:nyr][is.na(bio)==F],na.rm=T)*k.est/max(bio,na.rm=T)
u.CPUE <- ct/(q*bio)
}
# determine upper bound of Y-axis
max.y <- max(c(1.5, 1.2*u.CMSY/u.msy.CMSY,ct[yr==EndYear]/(q.btv[yr==EndYear]*k.est)/u.msy.CMSY,
ifelse(Btype=="observed"|Btype=="simulated",max(u.bio[is.na(u.bio)==F]/u.msy.bio),0),
na.rm=T))
# plot u from CMSY
plot(x=yr,y=u.CMSY/u.msy.CMSY, type="l", bty="l", ylim=c(0,max.y), xlab="Year",
ylab="u / u_msy", main="Exploitation rate")
abline(h=1, lty="dashed")
points(x=EndYear,y=ct[yr==EndYear]/(q.btv[yr==EndYear]*k.est)/u.msy.CMSY, col="purple", cex=1.5, lwd=2)
# plot u from biomass
if(Btype == "observed"|Btype=="simulated") lines(x=yr, y=u.bio/u.msy.bio, col="red")
# plot u from CPUE
if(Btype == "CPUE") {
par(new=T) # prepares for new plot on top of previous
plot(x=yr, y=u.CPUE, type="l", col="red", ylim=c(0, 1.2*max(u.CPUE,na.rm=T)),ann=F,axes=F)
axis(4, col="red", col.axis="red")
}
if(batch.mode == TRUE) {dev.off()} # close plot window
# ------------------------------------------
# print input and results to screen
cat("---------------------------------------\n")
cat("Species:", cinfo$ScientificName[cinfo$stock==stock], "\n")
cat("Name and region:", cinfo$EnglishName[cinfo$stock==stock], ",", cinfo$Name[cinfo$stock==stock], "\n")
cat("Stock:",stock,"\n")
cat("Catch data used from years", min(yr),"-", max(yr), "\n")
cat("Prior initial relative biomass =", startbio[1], "-", startbio[2], "\n")
cat("Prior intermediate rel. biomass=", intbio[1], "-", intbio[2], "in year", intyr, "\n")
cat("Prior final relative biomass =", endbio[1], "-", endbio[2], "\n")
cat("If current catches continue, is the stock likely to crash within 3 years?",FutureCrash,"\n")
cat("Prior range for r =", format(start_r[1],digits=2), "-", format(start_r[2],digits=2),
", prior range for k =", start_k[1], "-", start_k[2],"\n")
# if data are simulated, print true r-k
if(filename_1=="SimCatch.csv") {
cat("True r =", r.sim, "(because input data were simulated with Schaefer model)\n")
cat("True k = 1000 \n")
cat("True MSY =", 1000*r.sim/4,"\n")
cat("True biomass in last year =",bio[length(bio)],"or",bio[length(bio)]/1000,"k \n")
cat("True mean catch / MSY ratio =", mean(ct)/(1000*r.sim/4),"\n")
}
# print results from full Schaefer if available
if(Btype == "observed"|Btype=="simulated") {
cat("Results from Bayesian Schaefer model using catch & biomass (",Btype,")\n")
cat("MSY =", gm.MSY.jags,", 95% CL =", lcl.MSY.jags, "-", ucl.MSY.jags,"\n")
cat("Mean catch / MSY =", mean(ct)/gm.MSY.jags,"\n")
if(Btype != "CPUE") {
cat("r =", gm.r.jags,", 95% CL =", lcl.r.jags, "-", ucl.r.jags,"\n")
cat("k =", gm.k.jags,", 95% CL =", lcl.k.jags, "-", ucl.k.jags,"\n")
}
}
# results of CMSY analysis
cat("Results of CMSY analysis \n")
cat("Altogether", nviablepoints,"unique viable r-k pairs were found \n")
cat(nviablepoints-length(rem.log.r),"r-k pairs above the initial geometric mean of r =", gm.rv, "were analysed\n")
cat("r =", r.est,", 95% CL =", lcl.r.est, "-", ucl.r.est,"\n")
cat("k =", k.est,", 95% CL =", lcl.k.est, "-", ucl.k.est,"\n")
cat("MSY =", MSY.est,", 95% CL =", lcl.MSY.est, "-", ucl.MSY.est,"\n")
cat("Predicted biomass in last year =", lastyr.bio, "2.5th perc =", lcl.lastyr.bio,
"97.5th perc =", ucl.lastyr.bio,"\n")
cat("Predicted biomass in next year =", nextyr.bio, "2.5th perc =", lcl.nextyr.bio,
"97.5th perc =", ucl.nextyr.bio,"\n")
cat("----------------------------------------------------------\n")
## Write some results into outfile
if(write.output == TRUE) {
# write data into csv file
output = data.frame(cinfo$ScientificName[cinfo$stock==stock], stock, StartYear, EndYear, mean(ct)*1000,
ifelse(Btype=="observed"|Btype=="simulate",bio[length(bio)],NA), # last biomass on record
ifelse(Btype == "observed"|Btype=="simulated",gm.MSY.jags,NA), # full Schaefer
ifelse(Btype == "observed"|Btype=="simulated",lcl.MSY.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",ucl.MSY.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",gm.r.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",lcl.r.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",ucl.r.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",gm.k.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",lcl.k.jags,NA),
ifelse(Btype == "observed"|Btype=="simulated",ucl.k.jags,NA),
r.est, lcl.r.est, ucl.r.est, # CMSY r
k.est, lcl.k.est, ucl.k.est, # CMSY k
MSY.est, lcl.MSY.est, ucl.MSY.est, # CMSY r
lastyr.bio, lcl.lastyr.bio, ucl.lastyr.bio, # last year bio
nextyr.bio, lcl.nextyr.bio, ucl.nextyr.bio)# last year + 1 bio
write.table(output, file=outfile, append = T, sep = ",",
dec = ".", row.names = FALSE, col.names = FALSE)
# write some text into text outfile.txt
cat("Species:", cinfo$ScientificName[cinfo$stock==stock], "\n",
"Name:", cinfo$EnglishName[cinfo$stock==stock], "\n",
"Region:", cinfo$Name[cinfo$stock==stock], "\n",
"Stock:",stock,"\n",
"Catch data used from years", min(yr),"-", max(yr),", biomass =", Btype, "\n",
"Prior initial relative biomass =", startbio[1], "-", startbio[2], "\n",
"Prior intermediate rel. biomass=", intbio[1], "-", intbio[2], "in year", intyr, "\n",
"Prior final relative biomass =", endbio[1], "-", endbio[2], "\n",
"Future crash with current catches?", FutureCrash, "\n",
"Prior range for r =", format(start_r[1],digits=2), "-", format(start_r[2],digits=2),
", prior range for k =", start_k[1], "-", start_k[2],"\n",
file=outfile.txt,append=T)
if(filename_1=="SimCatch.csv") {
cat(" True r =", r.sim, "(because input data were simulated with Schaefer model)\n",
"True k = 1000, true MSY =", 1000*r.sim/4,"\n",
"True biomass in last year =",bio[length(bio)],"or",bio[length(bio)]/1000,"k \n",
"True mean catch / MSY ratio =", mean(ct)/(1000*r.sim/4),"\n",
file=outfile.txt,append=T)
}
if(Btype == "observed"|Btype=="simulated") {
cat(" Results from Bayesian Schaefer model using catch & biomass \n",
"r =", gm.r.jags,", 95% CL =", lcl.r.jags, "-", ucl.r.jags,"\n",
"k =", gm.k.jags,", 95% CL =", lcl.k.jags, "-", ucl.k.jags,"\n",
"MSY =", gm.MSY.jags,", 95% CL =", lcl.MSY.jags, "-", ucl.MSY.jags,"\n",
"Mean catch / MSY =", mean(ct)/gm.MSY.jags,"\n",
file=outfile.txt,append=T)
}
cat(" Results of CMSY analysis with altogether", nviablepoints,"unique viable r-k pairs \n",
nviablepoints-length(rem.log.r),"r-k pairs above the initial geometric mean of r =", gm.rv, "were analysed\n",
"r =", r.est,", 95% CL =", lcl.r.est, "-", ucl.r.est,"\n",
"k =", k.est,", 95% CL =", lcl.k.est, "-", ucl.k.est,"\n",
"MSY =", MSY.est,", 95% CL =", lcl.MSY.est, "-", ucl.MSY.est,"\n",
"Predicted biomass last year b/k =", lastyr.bio, "2.5th perc b/k =", lcl.lastyr.bio,
"97.5th perc b/k =", ucl.lastyr.bio,"\n",
"Precautionary 25th percentile b/k =",q.btv[yr==EndYear],"\n",
"----------------------------------------------------------\n",
file=outfile.txt,append=T)
}
} # end of stocks loop

View File

@ -0,0 +1,119 @@
##--------------------------------------------------------
## CMSY analysis with estimation of total biomass, including Bayesian Schaefer
## written by Rainer Froese with support from Gianpaolo Coro in 2013-2014
## This version adjusts biomass to average biomass over the year
## It also contains the FutureCrash option to improve prediction of final biomass
## Version 21 adds the purple point to indicate the 25th percentile of final biomass
## Version 22 accepts that no biomass or CPUE area available
##--------------------------------------------------------
library(R2jags) # Interface with JAGS
library(coda)
#-----------------------------------------
# Some general settings
#-----------------------------------------
# set.seed(999) # use for comparing results between runs
rm(list=ls(all=TRUE)) # clear previous variables etc
options(digits=3) # displays all numbers with three significant digits as default
graphics.off() # close graphics windows from previous sessions
#-----------------------------------------
# General settings for the analysis
#-----------------------------------------
sigR <- 0.02 # overall process error; 0.05 works reasonable for simulations, 0.02 for real data; 0 if deterministic model
n <- 10000 # initial number of r-k pairs
batch.mode <- T # set to TRUE to suppress graphs
write.output <- T # set to true if table of output is wanted
FutureCrash <- "No"
#-----------------------------------------
# Start output to screen
#-----------------------------------------
cat("-------------------------------------------\n")
cat("Catch-MSY Analysis,", date(),"\n")
cat("-------------------------------------------\n")
#------------------------------------------
# Read data and assign to vectors
#------------------------------------------
# filename_1 <- "AllStocks_Catch4.csv"
# filename_2 <- "AllStocks_ID4.csv"
# filename_1 <- "SimCatch.csv"
# filename_2 <- "SimSpec.csv"
# filename_2 <- "SimSpecWrongS.csv"
# filename_2 <- "SimSpecWrongI.csv"
# filename_2 <- "SimSpecWrongF.csv"
# filename_2 <- "SimSpecWrongH.csv"
# filename_2 <- "SimSpecWrongL.csv"
# filename_1 <- "FishDataLim.csv"
# filename_2 <- "FishDataLimSpec.csv"
filename_1 <- "WKLIFE4Stocks.csv"
filename_2 <- "WKLIFE4ID.csv"
outfile<-"outfile"
outfile.txt <- "outputfile.txt"
cdat <- read.csv(filename_1, header=T, dec=".", stringsAsFactors = FALSE)
cinfo <- read.csv(filename_2, header=T, dec=".", stringsAsFactors = FALSE)
cat("Files", filename_1, ",", filename_2, "read successfully","\n")
# Stocks with total biomass data and catch data from StartYear to EndYear
# stocks <- sort(as.character(cinfo$stock)) # All stocks
stocks<-"HLH_M07"
# select one stock after the other
for(stock in stocks) {
# assign data from cinfo to vectors
res <- as.character(cinfo$Resilience[cinfo$stock==stock])
StartYear <- as.numeric(cinfo$StartYear[cinfo$stock==stock])
EndYear <- as.numeric(cinfo$EndYear[cinfo$stock==stock])
r_low <- as.numeric(cinfo$r_low[cinfo$stock==stock])
r_hi <- as.numeric(cinfo$r_hi[cinfo$stock==stock])
stb_low <- as.numeric(cinfo$stb_low[cinfo$stock==stock])
stb_hi <- as.numeric(cinfo$stb_hi[cinfo$stock==stock])
intyr <- as.numeric(cinfo$intyr[cinfo$stock==stock])
intbio_low <- as.numeric(cinfo$intbio_low[cinfo$stock==stock])
intbio_hi <- as.numeric(cinfo$intbio_hi[cinfo$stock==stock])
endbio_low <- as.numeric(cinfo$endbio_low[cinfo$stock==stock])
endbio_hi <- as.numeric(cinfo$endbio_hi[cinfo$stock==stock])
Btype <- as.character(cinfo$Btype[cinfo$stock==stock])
FutureCrash <- as.character(cinfo$FutureCrash[cinfo$stock==stock])
comment <- as.character(cinfo$comment[cinfo$stock==stock])
# extract data on stock
yr <- as.numeric(cdat$yr[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])
ct <- as.numeric(cdat$ct[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that catch is given in tonnes, transforms to '000 tonnes
if(Btype=="observed" | Btype=="CPUE" | Btype=="simulated") {
bt <- as.numeric(cdat$TB[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that biomass is in tonnes, transforms to '000 tonnes
} else {bt <- NA}
nyr <- length(yr) # number of years in the time series
cat("->---------------------------------------
Species: NA
Name and region: NA , NA
Stock: HLH_M07
Catch data used from years 1 - 50
Prior initial relative biomass = 0.5 - 0.9
Prior intermediate rel. biomass= 0.01 - 0.4 in year 25
Prior final relative biomass = 0.4 - 0.8
If current catches continue, is the stock likely to crash within 3 years? No
Prior range for r = 0.2 - 0.8 , prior range for k = 125 - 9965
Results from Bayesian Schaefer model using catch & biomass ( simulated )
MSY = 91.7 , 95% CL = 83.9 - 100
Mean catch / MSY = 0.882
r = 0.425 , 95% CL = 0.374 - 0.483
k = 863 , 95% CL = 783 - 951
Results of CMSY analysis
Altogether 2055 unique viable r-k pairs were found
1142 r-k pairs above the initial geometric mean of r = 0.343 were analysed
r = 0.522 , 95% CL = 0.349 - 0.782
k = 683 , 95% CL = 438 - 1067
MSY = 89.2 , 95% CL = 82.2 - 96.7
Predicted biomass in last year = 0.676 2.5th perc = 0.435 97.5th perc = 0.768
Predicted biomass in next year = 0.673 2.5th perc = 0.433 97.5th perc = 0.758
----------------------------------------------------------
",file=outfile.txt,append=T)
}

View File

@ -0,0 +1,17 @@
<?xml version='1.0' encoding='UTF-8'?>
<hibernate-configuration>
<session-factory>
<property name="connection.driver_class">org.postgresql.Driver</property>
<property name="connection.provider_class">org.hibernate.connection.C3P0ConnectionProvider</property>
<property name="connection.url">jdbc:postgresql://localhost/testdb</property>
<property name="connection.username">gcube</property>
<property name="connection.password">d4science2</property>
<property name="dialect">org.hibernate.dialect.PostgreSQLDialect</property>
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
<property name="c3p0.timeout">0</property>
<property name="c3p0.max_size">1</property>
<property name="c3p0.max_statements">0</property>
<property name="c3p0.min_size">1</property>
<property name="current_session_context_class">thread</property>
</session-factory>
</hibernate-configuration>

View File

@ -0,0 +1,530 @@
#### R and JAGS code for estimating LWR-parameters from previous studies
#### Meant for updating the ESTIMATE table in FishBase
#### Created by Rainer Froese in March 2013, including JAGS models by James Thorston
#### Modified in June 2013 to include subfamilies
rm(list=ls(all=TRUE)) # remove previous variables and data
options(digits=3) # 3 significant digits as default
library(R2jags) # Interface with JAGS
runif(1) # sets random seed
#### Read in data
DataFile = "RF_LWR2.csv" # RF_LWR4 was extracted from FishBase in June 2013
Data = read.csv(DataFile, header=TRUE)
cat("Start", date(), "\n")
cat("Data file =", DataFile, "\n")
# Get unique, sorted list of Families
Fam.All <- sort(unique(as.character(Data$Family)))
Families <- Fam.All[Fam.All== "Acanthuridae" | Fam.All == "Achiridae"]
OutFile = "LWR_Test1.csv"
JAGSFILE = "dmnorm_0.bug"
# Get unique, sorted list of body shapes
Bshape <- sort(unique(as.character(Data$BodyShapeI)))
#------------------------------------------
# Functions
#------------------------------------------
#---------------------------------------------------------
# Function to get the priors for the respective body shape
#---------------------------------------------------------
Get.BS.pr <- function(BS) {
### Assignment of priors based on available body shape information
# priors derived from 5150 LWR studies in FishBase 02/2013
if (BS == "eel-like") { # eel-like prior for log(a) and b
prior_mean_log10a = -2.99
prior_sd_log10a = 0.175
prior_tau_log10a = 1/prior_sd_log10a^2
prior_mean_b = 3.06
prior_sd_b = 0.0896
prior_tau_b = 1/prior_sd_b^2
} else
if (BS == "elongated") { # elongate prior for log(a) and b
prior_mean_log10a = -2.41
prior_sd_log10a = 0.171
prior_tau_log10a = 1/prior_sd_log10a^2
prior_mean_b = 3.12
prior_sd_b = 0.09
prior_tau_b = 1/prior_sd_b^2
} else
if (BS == "fusiform / normal") { # fusiform / normal prior for log(a) and b
prior_mean_log10a = -1.95
prior_sd_log10a = 0.173
prior_tau_log10a = 1/prior_sd_log10a^2
prior_mean_b = 3.04
prior_sd_b = 0.0857
prior_tau_b = 1/prior_sd_b^2
} else
if (BS == "short and / or deep") { # short and / or deep prior for log(a) and b
prior_mean_log10a = -1.7
prior_sd_log10a = 0.175
prior_tau_log10a = 1/prior_sd_log10a^2
prior_mean_b = 3.01
prior_sd_b = 0.0905
prior_tau_b = 1/prior_sd_b^2
} else
# priors across all shapes, used for missing or other BS
{
prior_mean_log10a = -2.0
prior_sd_log10a = 0.313
prior_tau_log10a = 1/prior_sd_log10a^2
prior_mean_b = 3.04
prior_sd_b = 0.119
prior_tau_b = 1/prior_sd_b^2
}
# Priors for measurement error (= sigma) based on 5150 studies
# given here as shape mu and rate r, for gamma distribution
SD_rObs_log10a = 6520
SD_muObs_log10a = 25076
SD_rObs_b = 6808
SD_muObs_b = 37001
# Priors for between species variability (= sigma) based on 5150 studies for 1821 species
SD_rGS_log10a = 1372
SD_muGS_log10a = 7933
SD_rGS_b = 572
SD_muGS_b = 6498
prior.list <- list(mean_log10a=prior_mean_log10a, sd_log10a=prior_sd_log10a,
tau_log10a=prior_tau_log10a, mean_b=prior_mean_b, sd_b=prior_sd_b,
tau_b=prior_tau_b, SD_rObs_log10a=SD_rObs_log10a, SD_muObs_log10a=SD_muObs_log10a,
SD_rObs_b=SD_rObs_b, SD_muObs_b=SD_muObs_b, SD_rGS_log10a=SD_rGS_log10a,
SD_muGS_log10a=SD_muGS_log10a, SD_rGS_b=SD_rGS_b, SD_muGS_b=SD_muGS_b)
return(prior.list)
}
#--------------------------------------------------------------------
# Function to do a Bayesian analysis including LWR from relatives
#--------------------------------------------------------------------
SpecRelLWR <- function(a, b, wts, GenusSpecies, Nspecies, prior_mean_b, prior_tau_b,
prior_mean_log10a, prior_tau_log10a, SD_rObs_log10a, SD_muObs_log10a,
SD_rObs_b, SD_muObs_b, SD_rGS_log10a, SD_muGS_log10a,
SD_rGS_b, SD_muGS_b){
### Define JAGS model
Model = "
model {
#### Process model -- effects of taxonomy
# given the likelihood distributions and the priors,
# create normal posterior distributions for log10a, b,
# and for the process error (=between species variability sigmaGS)
abTrue[1] ~ dnorm(prior_mean_log10a,prior_tau_log10a)
abTrue[2] ~ dnorm(prior_mean_b,prior_tau_b)
sigmaGSlog10a ~ dgamma( SD_rGS_log10a, SD_muGS_log10a)
sigmaGSb ~ dgamma( SD_rGS_b, SD_muGS_b)
# given the posterior distributions and the process errors,
# establish for every species the expected witin-species
# parameter distributions; no correlation roGS between species
roGS <- 0
tauGenusSpecies[1] <- pow(sigmaGSlog10a,-2)
tauGenusSpecies[2] <- pow(sigmaGSb,-2)
for(k in 1:Nspecies){
abGenusSpecies[k,1] ~ dnorm(abTrue[1],tauGenusSpecies[1])
abGenusSpecies[k,2] ~ dnorm(abTrue[2],tauGenusSpecies[2])
}
### Observation model
## Errors
# given the data and the priors, establish distributions
# for the observation errors sigmaObs
sigmaObslog10a ~ dgamma( SD_rObs_log10a, SD_muObs_log10a)
sigmaObsb ~ dgamma( SD_rObs_b, SD_muObs_b)
# create inverse covariance matrix, with negative parameter correlation roObs
roObs ~ dunif(-0.99,0)
CovObs[1,1] <- pow(sigmaObslog10a,2)
CovObs[2,2] <- pow(sigmaObsb,2)
CovObs[1,2] <- roObs * sigmaObslog10a * sigmaObsb
CovObs[2,1] <- CovObs[1,2]
TauObs[1:2,1:2] <- inverse(CovObs[1:2,1:2])
## likelihood
# given the data, the priors and the covariance,
# create multivariate likelihood distributions for log10(a) and b
for(i in 1:N){
TauObsI[i,1:2,1:2] <- TauObs[1:2,1:2] * pow(Weights[i],2) # weighted precision
ab[i,1:2] ~ dmnorm(abGenusSpecies[GenusSpecies[i],1:2],TauObsI[i,1:2,1:2])
}
}
"
# Write JAGS model
cat(Model, file=JAGSFILE)
# JAGS settings
Nchains = 3 # number of MCMC chains to be used in JAGS
Nburnin = 1e4 # number of burn-in iterations, to be discarded; 1e4 = 10000 iterations for burn-in
Niter = 3e4 # number of iterations after burn-in; 3e4 = 30000 iterations
Nthin = 1e1 # subset of iterations to be used for analysis; 1e1 = every 10th iteration
# Run JAGS: define data to be passed on in DataJags;
# determine parameters to be returned in Param2Save;
# call JAGS with function Jags()
DataJags = list(ab=cbind(log10(a),b), N=length(a), Weights=wts, Nspecies=Nspecies, GenusSpecies=GenusSpecies,
prior_mean_b=prior_mean_b, prior_tau_b=prior_tau_b,
prior_mean_log10a=prior_mean_log10a, prior_tau_log10a=prior_tau_log10a,
SD_rObs_log10a=SD_rObs_log10a, SD_muObs_log10a=SD_muObs_log10a,
SD_rObs_b=SD_rObs_b, SD_muObs_b=SD_muObs_b,
SD_rGS_log10a=SD_rGS_log10a, SD_muGS_log10a=SD_muGS_log10a,
SD_rGS_b=SD_rGS_b, SD_muGS_b=SD_muGS_b)
Params2Save = c("abTrue","abGenusSpecies","sigmaGSlog10a","sigmaGSb","sigmaObslog10a","sigmaObsb","roObs")
Jags <- jags(inits=NULL, model.file=JAGSFILE, working.directory=NULL, data=DataJags,
parameters.to.save=Params2Save, n.chains=Nchains, n.thin=Nthin, n.iter=Niter, n.burnin=Nburnin)
Jags$BUGSoutput # contains the results from the JAGS run
# Analyze output for the relatives
abTrue <- Jags$BUGSoutput$sims.list$abTrue
R_mean_log10a <- mean(abTrue[,1]) # true mean of log10(a)
R_sd_log10a <- sd(abTrue[,1]) # true SE of log10(a)
R_mean_b <- mean(abTrue[,2]) # true mean of b
R_sd_b <- sd(abTrue[,2]) # true SE of b
# Analyze output for the target species
abGenusSpecies <- Jags$BUGSoutput$sims.list$abGenusSpecies
mean_log10a <- mean(abGenusSpecies[,1,1]) # true mean of log10(a) for the first species= target species
sd_log10a <- sd(abGenusSpecies[,1,1]) # true SE of log10(a)
mean_b <- mean(abGenusSpecies[,1,2]) # true mean of b
sd_b <- sd(abGenusSpecies[,1,2]) # true SE of b
mean_sigma_log10a <- mean(Jags$BUGSoutput$sims.list$sigmaObslog10a) # measurement error of log10(a)
sd_sigma_log10a <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigmaObslog10a), 2, sd)
mean_sigma_b <- mean(Jags$BUGSoutput$sims.list$sigmaObsb) # measurement error of b
sd_sigma_b <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigmaObsb), 2, sd)
ro_ab <- mean(Jags$BUGSoutput$sims.list$roObs) # measurement correlation of log10(a),b
out.list <- list(N=length(a), mean_log10a=mean_log10a, sd_log10a=sd_log10a, mean_b=mean_b, sd_b=sd_b,
R_mean_log10a=R_mean_log10a, R_sd_log10a=R_sd_log10a, R_mean_b=R_mean_b, R_sd_b=R_sd_b)
return(out.list)
}
#-----------------------------------------------------------------------------
# Function to do a Bayesian LWR analysis with studies for target species only
#-----------------------------------------------------------------------------
SpecLWR <- function(a, b, wts, prior_mean_b, prior_tau_b,
prior_mean_log10a, prior_tau_log10a, SD_rObs_log10a, SD_muObs_log10a,
SD_rObs_b, SD_muObs_b, SD_rGS_log10a, SD_muGS_log10a,
SD_rGS_b, SD_muGS_b){
# Define JAGS model
Model = "
model {
sigma1 ~ dgamma( SD_rObs_log10a, SD_muObs_log10a) # posterior distribution for measurement error in log10a
sigma2 ~ dgamma( SD_rObs_b, SD_muObs_b) # posterior distribution for measurement error in log10a
ro ~ dunif(-0.99,0) # uniform prior for negative correlation between log10a and b
abTrue[1] ~ dnorm(prior_mean_log10a,prior_tau_log10a) # normal posterior distribution for log10a
abTrue[2] ~ dnorm(prior_mean_b,prior_tau_b) # normal posterior distribution for b
CovObs[1,1] <- pow(sigma1,2)
CovObs[2,2] <- pow(sigma2,2)
CovObs[1,2] <- ro * sigma1 * sigma2
CovObs[2,1] <- CovObs[1,2]
TauObs[1:2,1:2] <- inverse(CovObs[1:2,1:2]) # create inverse covariance matrix
for(i in 1:N){
TauObsI[i,1:2,1:2] <- TauObs[1:2,1:2] * pow(Weights[i],2) # converts prior SD into prior weighted precision
# given the data, the priors and the covariance, create multivariate normal posteriors for log(a) and b
ab[i,1:2] ~ dmnorm(abTrue[1:2],TauObsI[i,1:2,1:2])
}
}
"
# Write JAGS model
cat(Model, file=JAGSFILE)
# JAGS settings
Nchains = 3 # number of MCMC chains to be used in JAGS
Nburnin = 1e4 # number of burn-in runs, to be discarded; 10000 iterations for burn-in
Niter = 3e4 # number of iterations after burn-in; 3e4 = 30000 iterations
Nthin = 1e1 # subset of iterations to be used for analysis; 1e1 = every 10th iteration
# Run JAGS: define data to be passed on in DataJags; determine parameters to be returned in Param2Save; call JAGS with function Jags()
DataJags = list(ab=cbind(log10(a),b), N=length(a), Weights=wts, prior_mean_b=prior_mean_b,
prior_tau_b=prior_tau_b, prior_mean_log10a=prior_mean_log10a, prior_tau_log10a=prior_tau_log10a,
SD_rObs_log10a=SD_rObs_log10a, SD_muObs_log10a=SD_muObs_log10a,
SD_rObs_b=SD_rObs_b, SD_muObs_b=SD_muObs_b)
Params2Save = c("abTrue","sigma1","sigma2","ro")
Jags <- jags(inits=NULL, model.file=JAGSFILE, working.directory=NULL, data=DataJags, parameters.to.save=Params2Save, n.chains=Nchains, n.thin=Nthin, n.iter=Niter, n.burnin=Nburnin)
Jags$BUGSoutput # contains the results from the JAGS run
# Analyze output
abTrue <- Jags$BUGSoutput$sims.list$abTrue
mean_log10a <- mean(abTrue[,1]) # true mean of log10(a)
sd_log10a <- sd(abTrue[,1]) # true SE of log10(a)
mean_b <- mean(abTrue[,2]) # true mean of b
sd_b <- sd(abTrue[,2]) # true SE of b
mean_sigma_log10a <- mean(Jags$BUGSoutput$sims.list$sigma1) # measurement error of log10(a)
sd_sigma_log10a <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigma1), 2, sd)
mean_sigma_b <- mean(Jags$BUGSoutput$sims.list$sigma2) # measurement error of b
sd_sigma_b <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigma2), 2, sd)
ro_ab <- mean(Jags$BUGSoutput$sims.list$ro) # measurement correlation of log10(a),b
out.list <- list(N=length(a), mean_log10a=mean_log10a, sd_log10a=sd_log10a, mean_b=mean_b, sd_b=sd_b)
return(out.list)
} # End of Functions section
#--------------------------------
# Analysis by Family
#--------------------------------
# Do LWR analysis by Family, Subfamily and Body shape, depending on available LWR studies
# for(Fam in "Acanthuridae") {
for(Fam in Families) {
Subfamilies <- sort(unique(Data$Subfamily[Data$Family==Fam]))
for(SF in Subfamilies) {
for(BS in Bshape) {
# get species (SpecCodes) in this Subfamily and with this body shape
SpecCode.SF.BS <- unique(Data$SpecCode[Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS])
# if there are species with this body shape
if(length(SpecCode.SF.BS > 0)) {
# get priors for this body shape
prior <- Get.BS.pr(BS)
# get LWR data for this body shape
b_raw <- Data$b[Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS]
cat("\n")
cat("Family =", Fam, ", Subfamily =", SF, ", Body shape =", BS, ", Species =", length(SpecCode.SF.BS), ", LWR =",
length(b_raw[is.na(b_raw)==F]), "\n")
# if no LWR studies exist for this body shape, assign the respective priors to all species
if(length(b_raw[is.na(b_raw)==F])==0) {
# assign priors to species with no LWR in this Subfamily with this body shape
cat("Assigning overall body shape prior to", length(SpecCode.SF.BS), " species \n")
for(SpC in SpecCode.SF.BS) {
out.prior <- data.frame(Fam, SF, BS, SpC, 0, prior$mean_log10a, prior$sd_log10a, prior$mean_b, prior$sd_b,
paste("all LWR estimates for this BS"))
write.table(out.prior, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
}
} else {
# Update priors for this body shape using existing LWR studies
# get LWR data for this Subfamily and body shape
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0)
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
a <- Data$a[Keep]
b <- Data$b[Keep]
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
# add a first dummy record with prior LWR and low score = 0.3, as pseudo target species
# Name of dummy target species is Dum1 dum1
TargetSpec = paste("Dum1", "dum1")
wts <- c(0.3, wts)
a <- c(10^(prior$mean_log10a), a)
b <- c(prior$mean_b, b)
GenSpec <- c(TargetSpec, GenSpec)
# Relabel GenSpec so that TargetSpec = level 1
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
Nspecies = nlevels(GenusSpecies) # number of species
# run Bayesian analysis for pseudo target species with Subfamily members
# The resulting R_mean_log10a, R_sd_log10a, R_mean_b, R_sd_b will be used for species without LWR
cat("Updating Subfamily-Bodyshape prior using", Nspecies-1, "species with LWR studies \n")
prior.SFam.BS <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies, prior_mean_b=prior$mean_b,
prior_tau_b=prior$tau_b, prior_mean_log10a=prior$mean_log10a,
prior_tau_log10a=prior$tau_log10a, SD_rObs_log10a=prior$SD_rObs_log10a,
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
SD_muGS_b=prior$SD_muGS_b)
#------------------------------------------------------------------------------------------
# if there are Genera with >= 5 species with LWR, update body shape priors for these Genera
#------------------------------------------------------------------------------------------
Genera <- unique(as.character(Data$Genus[Keep]))
# create empty list of lists for storage of generic priors
prior.Gen.BS <- rep(list(list()),length(Genera)) # create a list of empty lists
names(prior.Gen.BS) <- Genera # name the list elements according to the Genera
for(Genus in Genera){
# check if Genus contains >= 5 species with LWR data
if(length(unique(Data$SpecCode[Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F &
Data$Score>0 & Data$Genus==Genus]))>=5) {
# run Subfamily analysis with only data for this genus
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0 &
Data$Genus==Genus)
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
a <- Data$a[Keep]
b <- Data$b[Keep]
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
# add a first dummy record with prior LWR and low score = 0.3, as pseudo target species
# Name of dummy target species is Dum1 dum1
TargetSpec = paste("Dum1", "dum1")
wts <- c(0.3, wts)
a <- c(10^(prior$mean_log10a), a)
b <- c(prior$mean_b, b)
GenSpec <- c(TargetSpec, GenSpec)
# Relabel GenSpec so that TargetSpec = level 1
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
Nspecies = nlevels(GenusSpecies) # number of species
# run Bayesian analysis for pseudo target species with Genus members
# R_mean_log10a, R_sd_log10a, R_mean_b, R_sd_b will be used for species without LWR
cat("Updating prior for Genus =", Genus, ", with", Nspecies -1, "LWR Species \n")
prior.Gen.BS[[Genus]] <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies,
prior_mean_b=prior.SFam.BS$R_mean_b,
prior_tau_b=1/prior.SFam.BS$R_sd_b^2,
prior_mean_log10a=prior.SFam.BS$R_mean_log10a,
prior_tau_log10a=1/prior.SFam.BS$R_sd_log10a,
SD_rObs_log10a=prior$SD_rObs_log10a,
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
SD_muGS_b=prior$SD_muGS_b)
}
}
# new Subfamily-BS priors have been generated
# for some genera, new Genus-BS priors have been generated
# ---------------------------------------------------------------------
# Loop through all species in this Subfamily-BS; assign LWR as appropriate
# ---------------------------------------------------------------------
for(SpC in SpecCode.SF.BS) {
Genus <- as.character(unique(Data$Genus[Data$SpecCode==SpC]))
Species <- as.character(unique(Data$Species[Data$SpecCode==SpC]))
TargetSpec = paste(Genus, Species)
LWR <- length(Data$b[Data$SpecCode==SpC & is.na(Data$b)==F & Data$Score>0])
LWRGenspec <- length(unique(Data$SpecCode[Data$BodyShapeI==BS & is.na(Data$b)==F &
Data$Score>0 & Data$Genus==Genus]))
LWRSFamspec <- length(unique(Data$SpecCode[Data$BodyShapeI==BS & is.na(Data$b)==F &
Data$Score>0 & Data$Family==Fam & Data$Subfamily==SF]))
#---------------------------------------------------------
# >= 5 LWR in target species, run single species analysis
if(LWR >= 5) {
# Run analysis with data only for this species
Keep <- which(Data$SpecCode==SpC & is.na(Data$b)==F & Data$Score>0)
wts = Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
a = Data$a[Keep]
b = Data$b[Keep]
# determine priors to be used
if(LWRGenspec >= 5) {
prior_mean_b=prior.Gen.BS[[Genus]]$R_mean_b
prior_tau_b=1/prior.Gen.BS[[Genus]]$R_sd_b^2
prior_mean_log10a=prior.Gen.BS[[Genus]]$R_mean_log10a
prior_tau_log10a=1/prior.Gen.BS[[Genus]]$R_sd_log10a^2
} else
if (LWRSFamspec > 0) {
prior_mean_b=prior.SFam.BS$R_mean_b
prior_tau_b=1/prior.SFam.BS$R_sd_b^2
prior_mean_log10a=prior.SFam.BS$R_mean_log10a
prior_tau_log10a=1/prior.SFam.BS$R_sd_log10a^2
} else {
prior_mean_b=prior$mean_b
prior_tau_b=prior$tau_b
prior_mean_log10a=prior$mean_log10a
prior_tau_log10a=prior$tau_log10a
}
cat("Running single species analysis for", TargetSpec, "LWR =", LWR, ", LWR species in Genus=",LWRGenspec,"\n" )
# call function for single species analysis
post <- SpecLWR(a, b, wts, prior_mean_b=prior_mean_b,
prior_tau_b=prior_tau_b, prior_mean_log10a=prior_mean_log10a,
prior_tau_log10a=prior_tau_log10a, SD_rObs_log10a=prior$SD_rObs_log10a,
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
SD_muGS_b=prior$SD_muGS_b)
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(post$mean_log10a, digits=3), format(post$sd_log10a, digits=3), format(post$mean_b, disgits=3), format(post$sd_b, digits=3),
paste("LWR estimates for this species"))
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
} else
#--------------------------------------------------------
# 1-4 LWR in target species and >= 5 LWR species in Genus
# run hierarchical analysis for genus members, with Subfamily-BS prior
if(LWR >= 1 & LWRGenspec >=5) {
# run Subfamily analysis with only data for this genus
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0 &
Data$Genus==Genus)
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
a <- Data$a[Keep]
b <- Data$b[Keep]
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
# Relabel GenSpec so that TargetSpec = level 1
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
Nspecies = nlevels(GenusSpecies) # number of species
# run Bayesian analysis for target species with Genus members
cat("Running analysis with congeners for", TargetSpec, ", LWR =", LWR,", LWR species in Genus =", LWRGenspec,"\n")
post <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies,
prior_mean_b=prior.SFam.BS$R_mean_b,
prior_tau_b=1/prior.SFam.BS$R_sd_b^2,
prior_mean_log10a=prior.SFam.BS$R_mean_log10a,
prior_tau_log10a=1/prior.SFam.BS$R_sd_log10a^2,
SD_rObs_log10a=prior$SD_rObs_log10a,
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
SD_muGS_b=prior$SD_muGS_b)
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(post$mean_log10a, digits=3), format(post$sd_log10a, digits=3), format(post$mean_b, disgits=3), format(post$sd_b, digits=3),
paste("LWR estimates for species & Genus-BS"))
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
} else
#-------------------------------------------------------
# 1-4 LWR in target species and < 5 LWR species in Genus
# run hierarchical analysis for Subfamily members, with bodyshape prior
if(LWR >= 1 & LWRSFamspec > 1) {
# run Subfamily analysis
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0)
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
a <- Data$a[Keep]
b <- Data$b[Keep]
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
# Relabel GenSpec so that TargetSpec = level 1
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
Nspecies = nlevels(GenusSpecies) # number of species
# run Bayesian analysis for target species with Subfamily members
cat("Running analysis with Subfamily members for", TargetSpec, ", LWR =", LWR,", LWR species in Subfamily-BS =",
LWRSFamspec, "\n")
post <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies,
prior_mean_b=prior$mean_b,
prior_tau_b=prior$tau_b,
prior_mean_log10a=prior$mean_log10a,
prior_tau_log10a=prior$tau_log10a,
SD_rObs_log10a=prior$SD_rObs_log10a,
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
SD_muGS_b=prior$SD_muGS_b)
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(post$mean_log10a, digits=3), format(post$sd_log10a, digits=3),
format(post$mean_b, disgits=3), format(post$sd_b, digits=3),
paste("LWR estimates for species & Subfamily-BS"))
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
} else
#--------------------------------------------------
# assign Genus-BS priors to target species
if(LWRGenspec >= 5) {
cat("Assign Genus-BS prior for", TargetSpec, "\n")
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(prior.Gen.BS[[Genus]]$mean_log10a, digits=3),
format(prior.Gen.BS[[Genus]]$sd_log10a, digits=3),
format(prior.Gen.BS[[Genus]]$mean_b, digits=3), format(prior.Gen.BS[[Genus]]$sd_b, digits=3),
paste("LWR estimates for this Genus-BS"))
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
} else {
# -----------------------------------------------
# assign Subfamily-BS priors to target species
cat("Assign Subfamily-BS prior for", TargetSpec,"\n")
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(prior.SFam.BS$mean_log10a, digits=3), format(prior.SFam.BS$sd_log10a, digits=3),
format(prior.SFam.BS$mean_b, digits=3), format(prior.SFam.BS$sd_b, digits=3), paste("LWR estimates for this Subfamily-BS"))
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
}
} # end of species loop for this Subfamily and body shape
} # end of section dealing with Subfamily - body shapes that contain LWR estimates
} # end of section that deals with Subfamily - body shapes that contain species
} # end of body shape section
} # end of Subfamily section
} # end of Family section
cat("End", date(),"\n")

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<jardesc>
<jar path="EcologicalEngineExecutor/AQUAMAPS_SUITABLE/aquamapsnode.jar"/>
<options buildIfNeeded="true" compress="true" descriptionLocation="/EcologicalEngineExecutor/aquamapsjarcreator.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
<selectedProjects/>
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
<sealing sealJar="false">
<packagesToSeal/>
<packagesToUnSeal/>
</sealing>
</manifest>
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java&lt;org.gcube.dataanalysis.executor.generators"/>
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java&lt;org.gcube.dataanalysis.executor.nodes.algorithms"/>
</selectedElements>
</jardesc>

32
cfg/ALog.properties Normal file
View File

@ -0,0 +1,32 @@
#### Use two appenders, one to log to console, another to log to a file
log4j.rootCategory= R
#### First appender writes to console
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{dd/MM/yyyy HH:mm:ss} %p %t %c - %m%n
#log4j.appender.stdout.layout.ConversionPattern=%m%n
#log4j.appender.stdout.File=Analysis.log
#### Second appender writes to a file
log4j.logger.AnalysisLogger=trace,stdout, R
log4j.appender.R=org.apache.log4j.RollingFileAppender
#log4j.appender.R=org.apache.log4j.AsyncAppender
#log4j.appender.R.Threshold=INFO
log4j.appender.R.File=Analysis.log
log4j.appender.R.MaxFileSize=50000KB
log4j.appender.R.MaxBackupIndex=2
log4j.appender.R.layout=org.apache.log4j.PatternLayout
log4j.appender.R.layout.ConversionPattern=%d{dd/MM/yyyy HH:mm:ss} %p %t %c - %m%n
#log4j.appender.R.layout.ConversionPattern=%m%n
#### Third appender writes to a file
log4j.logger.org.hibernate=H
#log4j.appender.H=org.apache.log4j.RollingFileAppender
log4j.appender.H=org.apache.log4j.AsyncAppender
#log4j.appender.H.File=HibernateLog.log
#log4j.appender.H.MaxFileSize=1024KB
#log4j.appender.H.MaxBackupIndex=2
log4j.appender.H.layout=org.apache.log4j.PatternLayout
log4j.appender.H.layout.ConversionPattern=%d{dd/MM/yyyy HH:mm:ss} %p %t %c - %m%n

View File

View File

@ -0,0 +1,18 @@
<?xml version='1.0' encoding='UTF-8'?>
<hibernate-configuration>
<session-factory>
<property name="connection.driver_class">org.postgresql.Driver</property>
<property name="connection.provider_class">org.hibernate.connection.C3P0ConnectionProvider</property>
<property name="connection.url">jdbc:postgresql://localhost/testdb</property>
<property name="connection.username">gcube</property>
<property name="connection.password">d4science2</property>
<!-- <property name="dialect">org.hibernatespatial.postgis.PostgisDialect</property>-->
<property name="dialect">org.hibernate.dialect.PostgreSQLDialect</property>
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
<property name="c3p0.timeout">0</property>
<property name="c3p0.max_size">10</property>
<property name="c3p0.max_statements">0</property>
<property name="c3p0.min_size">1</property>
<property name="current_session_context_class">thread</property>
</session-factory>
</hibernate-configuration>

View File

@ -0,0 +1,9 @@
AQUAMAPS_SUITABLE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable
AQUAMAPS_NATIVE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative
AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative2050
AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable2050
AQUAMAPS_NATIVE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNN
AQUAMAPS_SUITABLE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNNSuitable
FEED_FORWARD_A_N_N_DISTRIBUTION=org.gcube.dataanalysis.ecoengine.spatialdistributions.FeedForwardNeuralNetworkDistribution
LWR=org.gcube.dataanalysis.executor.nodes.algorithms.LWR
CMSY=org.gcube.dataanalysis.executor.nodes.algorithms.CMSY

View File

@ -0,0 +1,3 @@
DBSCAN=org.gcube.dataanalysis.ecoengine.clustering.DBScan
KMEANS=org.gcube.dataanalysis.ecoengine.clustering.KMeans
XMEANS=org.gcube.dataanalysis.ecoengine.clustering.XMeansWrapper

View File

@ -0,0 +1,3 @@
DISCREPANCY_ANALYSIS=org.gcube.dataanalysis.ecoengine.evaluation.DiscrepancyAnalysis
QUALITY_ANALYSIS=org.gcube.dataanalysis.ecoengine.evaluation.DistributionQualityAnalysis
HRS=org.gcube.dataanalysis.ecoengine.evaluation.HabitatRepresentativeness

View File

@ -0,0 +1,6 @@
LOCAL_WITH_DATABASE=org.gcube.dataanalysis.ecoengine.processing.LocalSplitGenerator
SIMPLE_LOCAL=org.gcube.dataanalysis.ecoengine.processing.LocalSimpleSplitGenerator
D4SCIENCE=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing
#OCCURRENCES_MERGER=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing
OCCURRENCES_INTERSECTOR=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing
#OCCURRENCES_SUBTRACTION=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing

439
cfg/interpolateTacsat.r Normal file
View File

@ -0,0 +1,439 @@
cat("Retrieving Input Parameters\n")
inputFile<-'tacsat.csv'
outputFile<-'tacsat_interpolated.csv'
require(data.table)
print(Sys.time())
memory.size(max = TRUE)
memory.limit(size = 4000)
interCubicHermiteSpline <- function(spltx,spltCon,res,params,headingAdjustment){
#Formula of Cubic Hermite Spline
t <- seq(0,1,length.out=res)
F00 <- 2*t^3 -3*t^2 + 1
F10 <- t^3-2*t^2+t
F01 <- -2*t^3+3*t^2
F11 <- t^3-t^2
#Making tacsat dataset ready
spltx[spltCon[,1],"SI_HE"][which(is.na(spltx[spltCon[,1],"SI_HE"]))] <- 0
spltx[spltCon[,2],"SI_HE"][which(is.na(spltx[spltCon[,2],"SI_HE"]))] <- 0
#Heading at begin point in degrees
Hx0 <- sin(spltx[spltCon[,1],"SI_HE"]/(180/pi))
Hy0 <- cos(spltx[spltCon[,1],"SI_HE"]/(180/pi))
#Heading at end point in degrees
Hx1 <- sin(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi))
Hy1 <- cos(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi))
#Start and end positions
Mx0 <- spltx[spltCon[,1],"SI_LONG"]
Mx1 <- spltx[spltCon[,2],"SI_LONG"]
My0 <- spltx[spltCon[,1],"SI_LATI"]
My1 <- spltx[spltCon[,2],"SI_LATI"]
#Corrected for longitude lattitude effect
Hx0 <- Hx0 * params$fm * spltx[spltCon[,1],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
Hx1 <- Hx1 * params$fm * spltx[spltCon[,2],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
Hy0 <- Hy0 * params$fm * lonLatRatio(spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,1],"SI_LATI"]) * spltx[spltCon[,1],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
Hy1 <- Hy1 * params$fm * lonLatRatio(spltx[spltCon[,2],"SI_LONG"],spltx[spltCon[,2],"SI_LATI"]) * spltx[spltCon[,2],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
#Get the interpolation
fx <- outer(F00,Mx0,"*")+outer(F10,Hx0,"*")+outer(F01,Mx1,"*")+outer(F11,Hx1,"*")
fy <- outer(F00,My0,"*")+outer(F10,Hy0,"*")+outer(F01,My1,"*")+outer(F11,Hy1,"*")
#Create output format
intsx <- lapply(as.list(1:nrow(spltCon)),function(x){
matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2,
dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))})
return(intsx)}
rbindTacsat <- function(set1,set2){
cln1 <- colnames(set1)
cln2 <- colnames(set2)
if(any(duplicated(cln1)==TRUE) || any(duplicated(cln2)==TRUE)) stop("Duplicate column names in datasets")
idx1 <- which(is.na(pmatch(cln1,cln2))==TRUE)
idx2 <- which(is.na(pmatch(cln2,cln1))==TRUE)
if(length(idx1)>0){
for(i in idx1) set2 <- cbind(set2,NA)
colnames(set2) <- c(cln2,cln1[idx1])}
if(length(idx2)>0){
for(i in idx2) set1 <- cbind(set1,NA)
colnames(set1) <- c(cln1,cln2[idx2])}
cln1 <- colnames(set1)
cln2 <- colnames(set2)
mtch <- pmatch(cln1,cln2)
if(any(is.na(mtch))==TRUE) stop("Cannot find nor create all matching column names")
set3 <- rbind(set1,set2[,cln2[mtch]])
return(set3)}
bearing <- function(lon,lat,lonRef,latRef){
x1 <- lon
y1 <- lat
x2 <- lonRef
y2 <- latRef
y <- sin((x2-x1)*pi/180) * cos(y2*pi/180)
x <- cos(y1*pi/180) * sin(y2*pi/180) - sin(y1*pi/180) * cos(y2*pi/180) * cos((x2-x1)*pi/180)
bearing <- atan2(y,x)*180/pi
bearing <- (bearing + 360)%%360
return(bearing)}
`distance` <-
function(lon,lat,lonRef,latRef){
pd <- pi/180
a1<- sin(((latRef-lat)*pd)/2)
a2<- cos(lat*pd)
a3<- cos(latRef*pd)
a4<- sin(((lonRef-lon)*pd)/2)
a <- a1*a1+a2*a3*a4*a4
c <- 2*atan2(sqrt(a),sqrt(1-a));
return(6371*c)}
distanceInterpolation <- function(interpolation){
res <- unlist(lapply(interpolation,function(x){
dims <- dim(x)
res <- distance(x[3:dims[1],1],x[3:dims[1],2],x[2:(dims[1]-1),1],x[2:(dims[1]-1),2])
return(sum(res,na.rm=TRUE))}))
return(res)}
equalDistance <- function(interpolation,res=10){
#Calculate ditance of all interpolations at the same time
totDist <- distanceInterpolation(interpolation)
#Get dimensions of interpolations
lngInt <- lapply(interpolation,dim)
#Warn if resolution of equal distance is too high compared to original resolution of interpolation
if(min(unlist(lngInt)[seq(1,length(totDist),2)],na.rm=TRUE) < 9*res) warnings("Number of intermediate points in the interpolation might be too small for the equal distance pionts chosen")
#Get distance steps to get equal distance
eqStep <- totDist/(res-1)
#Get x-y values of all interpolations
intidx <- matrix(unlist(lapply(interpolation,function(x){return(x[1,])})),ncol=2,byrow=TRUE)
#Do the calculation
result <- lapply(interpolation,function(ind){
i <- which(intidx[,1] == ind[1,1] & intidx[,2] == ind[1,2])
idx <- apply(abs(outer(
cumsum(distance(ind[3:lngInt[[i]][1],1],ind[3:lngInt[[i]][1],2],ind[2:(lngInt[[i]][1]-1),1],ind[2:(lngInt[[i]][1]-1),2])),
seq(eqStep[i],totDist[i],eqStep[i]),
"-")),
2,which.min)+1
idx <- c(1,idx)
return(ind[c(1,idx+1),])})
#Return the equal distance interpolated set in the same format as the interpolated dataset (as a list)
return(result)}
interStraightLine <- function(spltx,spltCon,res){
fx <- mapply(seq,spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,2],"SI_LONG"],length.out=res)
fy <- mapply(seq,spltx[spltCon[,1],"SI_LATI"],spltx[spltCon[,2],"SI_LATI"],length.out=res)
#Create output format
intsx <- lapply(as.list(1:nrow(spltCon)),function(x){
matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2,
dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))})
return(intsx)}
interpolation2Tacsat <- function(interpolation,tacsat,npoints=10,equalDist=TRUE){
# This function takes the list of tracks output by interpolateTacsat and converts them back to tacsat format.
# The npoints argument is the optional number of points between each 'real' position.
tacsat <- sortTacsat(tacsat)
if(!"HL_ID" %in% colnames(tacsat)) tacsat$HL_ID <- 1:nrow(tacsat)
if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
if(equalDist){
interpolationEQ <- equalDistance(interpolation,npoints) #Divide points equally along interpolated track (default is 10).
} else {
interpolationEQ <- lapply(interpolation,function(x){idx <- round(seq(2,nrow(x),length.out=npoints)); return(x[c(1,idx),])})
}
res <- lapply(interpolationEQ,function(x){
idx <- unlist(x[1,1:2]@.Data); x <- data.frame(x)
colnames(x) <- c("SI_LONG","SI_LATI")
cls <- which(apply(tacsat[c(idx),],2,function(y){return(length(unique(y)))})==1)
for(i in cls){
x <- cbind(x,rep(tacsat[idx[1],i],nrow(x)));
colnames(x) <- c(colnames(x)[1:(ncol(x)-1)],colnames(tacsat)[i])
}
if(!"VE_COU" %in% colnames(x)) x$VE_COU <- rep(tacsat$VE_COU[idx[1]],nrow(x))
if(!"VE_REF" %in% colnames(x)) x$VE_REF <- rep(tacsat$VE_REF[idx[1]],nrow(x))
if(!"FT_REF" %in% colnames(x)) x$FT_REF <- rep(tacsat$FT_REF[idx[1]],nrow(x))
x$SI_DATIM <- tacsat$SI_DATIM[idx[1]]
x$SI_DATIM[-c(1:2)] <- as.POSIXct(cumsum(rep(difftime(tacsat$SI_DATIM[idx[2]],tacsat$SI_DATIM[idx[1]],units="secs")/(nrow(x)-2),nrow(x)-2))+tacsat$SI_DATIM[idx[1]],tz="GMT",format = "%d/%m/%Y %H:%M")
x$SI_DATE <- format(x$SI_DATIM,format="%d/%m/%Y")
timeNotation <- ifelse(length(unlist(strsplit(tacsat$SI_TIME[1],":")))>2,"secs","mins")
if(timeNotation == "secs") x$SI_TIME <- format(x$SI_DATIM,format="%H:%M:%S")
if(timeNotation == "mins") x$SI_TIME <- format(x$SI_DATIM,format="%H:%M")
x$SI_SP <- mean(c(tacsat$SI_SP[idx[1]],tacsat$SI_SP[idx[2]]),na.rm=TRUE)
x$SI_HE <- NA;
x$SI_HE[-c(1,nrow(x))] <- bearing(x$SI_LONG[2:(nrow(x)-1)],x$SI_LATI[2:(nrow(x)-1)],x$SI_LONG[3:nrow(x)],x$SI_LATI[3:nrow(x)])
x$HL_ID <- tacsat$HL_ID[idx[1]]
return(x[-c(1,2,nrow(x)),])})
#interpolationTot <- do.call(rbind,res)
interpolationTot <- res[[1]][,which(duplicated(colnames(res[[1]]))==FALSE)]
if(length(res)>1){
for(i in 2:length(res)){
if(nrow(res[[i]])>0)
interpolationTot <- rbindTacsat(interpolationTot,res[[i]][,which(duplicated(colnames(res[[i]]))==FALSE)])
}
}
#tacsatInt <- rbind(interpolationTot,tacsat[,colnames(interpolationTot)])
tacsatInt <- rbindTacsat(tacsat,interpolationTot)
tacsatInt <- sortTacsat(tacsatInt)
return(tacsatInt)
}
`sortTacsat` <-
function(dat){
require(doBy)
if(!"SI_DATIM" %in% colnames(dat)) dat$SI_DATIM <- as.POSIXct(paste(dat$SI_DATE, dat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
#Sort the tacsat data first by ship, then by date
if("VE_REF" %in% colnames(dat)) dat <- orderBy(~VE_REF+SI_DATIM,data=dat)
if("OB_REF" %in% colnames(dat)) dat <- orderBy(~OB_REF+SI_DATIM,data=dat)
return(dat)}
`lonLatRatio` <-
function(x1,lat){
#Based on the Haversine formula
#At the position, the y-position remains the same, hence, cos(lat)*cos(lat) instead of cos(lat) * cos(y2)
a <- cos(lat*pi/180)*cos(lat*pi/180)*sin((0.1*pi/180)/2)*sin((0.1*pi/180)/2);
c <- 2*atan2(sqrt(a),sqrt(1-a));
R <- 6371;
dx1 <- R*c
return(c(dx1/11.12))}
`an` <-
function(x){return(as.numeric(x))}
`findEndTacsat` <-
function(tacsat
,startTacsat #Starting point of VMS
,interval #Specify in minutes, NULL means use all points
,margin #Specify the margin in minutes it might deviate from the interval time, in minutes
){
VMS <- tacsat
if(!"SI_DATIM" %in% colnames(VMS)) VMS$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
startVMS <- startTacsat
clStartVMS <- startVMS #Total VMS list starting point instead of subset use
iShip <- VMS$VE_REF[startVMS]
VMS. <- subset(VMS,VE_REF==iShip)
startVMS <- which(VMS$VE_REF[startVMS] == VMS.$VE_REF & VMS$SI_DATIM[startVMS] == VMS.$SI_DATIM)
if(clStartVMS != dim(VMS)[1]){
if(VMS$VE_REF[clStartVMS] != VMS$VE_REF[clStartVMS+1]){
#End of dataset reached
endDataSet <- 1
endVMS <- NA
} else {
#Calculate the difference in time between the starting VMS point and its succeeding points
diffTime <- difftime(VMS.$SI_DATIM[(startVMS+1):dim(VMS.)[1]],VMS.$SI_DATIM[startVMS],units=c("mins"))
if(length(which(diffTime >= (interval-margin) & diffTime <= (interval+margin)))==0){
warning("No succeeding point found, no interpolation possible")
endVMS <- NA
#Check if end of dataset has been reached
ifelse(all((diffTime < (interval-margin))==TRUE),endDataSet <- 1,endDataSet <- 0)
} else {
res <- which(diffTime >= (interval-margin) & diffTime <= (interval+margin))
if(length(res)>1){
res2 <- which.min(abs(interval-an(diffTime[res])))
endVMS <- startVMS + res[res2]
endDataSet <- 0
} else {
endVMS <- startVMS + res
endDataSet <- 0
}
}
#Build-in check
if(is.na(endVMS)==FALSE){
if(!an(difftime(VMS.$SI_DATIM[endVMS],VMS.$SI_DATIM[startVMS],units=c("mins"))) %in% seq((interval-margin),(interval+margin),1)) stop("found endVMS point not within interval range")
endVMS <- clStartVMS + (endVMS - startVMS)
}
}
} else { endDataSet <- 1; endVMS <- NA}
return(c(endVMS,endDataSet))}
`interpolateTacsat` <-
function(tacsat #VMS datapoints
,interval=120 #Specify in minutes, NULL means use all points
,margin=12 #Specify the margin in minutes that the interval might deviate in a search for the next point
,res=100 #Resolution of interpolation method (default = 100)
,method="cHs" #Specify the method to be used: Straight line (SL) of cubic Hermite spline (cHs)
,params=list(fm=0.5,distscale=20,sigline=0.2,st=c(2,6)) #Specify the three parameters: fm, distscale, sigline, speedthreshold
,headingAdjustment=0
,fast=FALSE){
if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
#Start interpolating the data
if(!method %in% c("cHs","SL")) stop("method selected that does not exist")
#-------------------------------------------------------------------------------
#Fast method or not
#-------------------------------------------------------------------------------
if(fast){
#Interpolation only by vessel, so split tacsat up
tacsat$ID <- 1:nrow(tacsat)
splitTa <- split(tacsat,tacsat$VE_REF)
spltTaCon <- lapply(splitTa,function(spltx){
#Calculate time different between every record
dftimex <- outer(spltx$SI_DATIM,spltx$SI_DATIM,difftime,units="mins")
iStep <- 1
connect <- list()
counter <- 1
#Loop over all possible combinations and store if a connection can be made
while(iStep <= nrow(spltx)){
endp <- which(dftimex[,iStep] >= (interval - margin) & dftimex[,iStep] <= (interval + margin))
if(length(endp)>0){
if(length(endp)>1) endp <- endp[which.min(abs(interval - dftimex[endp,iStep]))][1]
connect[[counter]] <- c(iStep,endp)
counter <- counter + 1
iStep <- endp
} else { iStep <- iStep + 1}
}
#Return matrix of conenctions
return(do.call(rbind,connect))})
if(method=="cHs") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){
return(interCubicHermiteSpline(spltx=splitTa[[y]],spltCon=spltTaCon[[y]],res,params,headingAdjustment))}),recursive=FALSE)
if(method=="SL") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){
return(interStraightLine(splitTa[[y]],spltTaCon[[y]],res))}),recursive=FALSE)
} else {
#Initiate returning result object
returnInterpolations <- list()
#Start iterating over succeeding points
for(iStep in 1:(dim(tacsat)[1]-1)){
if(iStep == 1){
iSuccess <- 0
endDataSet <- 0
startVMS <- 1
ship <- tacsat$VE_REF[startVMS]
} else {
if(is.na(endVMS)==TRUE) endVMS <- startVMS + 1
startVMS <- endVMS
#-Check if the end of the dataset is reached
if(endDataSet == 1 & rev(unique(tacsat$VE_REF))[1] != ship){
startVMS <- which(tacsat$VE_REF == unique(tacsat$VE_REF)[which(unique(tacsat$VE_REF)==ship)+1])[1]
ship <- tacsat$VE_REF[startVMS]
endDataSet<- 0
}
if(endDataSet == 1 & rev(unique(tacsat$VE_REF))[1] == ship) endDataSet <- 2 #Final end of dataset
}
#if end of dataset is not reached, try to find succeeding point
if(endDataSet != 2){
result <- findEndTacsat(tacsat,startVMS,interval,margin)
endVMS <- result[1]
endDataSet <- result[2]
if(is.na(endVMS)==TRUE) int <- 0 #No interpolation possible
if(is.na(endVMS)==FALSE) int <- 1 #Interpolation possible
#Interpolate according to the Cubic Hermite Spline method
if(method == "cHs" & int == 1){
#Define the cHs formula
F00 <- numeric()
F10 <- numeric()
F01 <- numeric()
F11 <- numeric()
i <- 0
t <- seq(0,1,length.out=res)
F00 <- 2*t^3 -3*t^2 + 1
F10 <- t^3-2*t^2+t
F01 <- -2*t^3+3*t^2
F11 <- t^3-t^2
if (is.na(tacsat[startVMS,"SI_HE"])=="TRUE") tacsat[startVMS,"SI_HE"] <- 0
if (is.na(tacsat[endVMS, "SI_HE"])=="TRUE") tacsat[endVMS, "SI_HE"] <- 0
#Heading at begin point in degrees
Hx0 <- sin(tacsat[startVMS,"SI_HE"]/(180/pi))
Hy0 <- cos(tacsat[startVMS,"SI_HE"]/(180/pi))
#Heading at end point in degrees
Hx1 <- sin(tacsat[endVMS-headingAdjustment,"SI_HE"]/(180/pi))
Hy1 <- cos(tacsat[endVMS-headingAdjustment,"SI_HE"]/(180/pi))
Mx0 <- tacsat[startVMS, "SI_LONG"]
Mx1 <- tacsat[endVMS, "SI_LONG"]
My0 <- tacsat[startVMS, "SI_LATI"]
My1 <- tacsat[endVMS, "SI_LATI"]
#Corrected for longitude lattitude effect
Hx0 <- Hx0 * params$fm * tacsat[startVMS,"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
Hx1 <- Hx1 * params$fm * tacsat[endVMS,"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
Hy0 <- Hy0 * params$fm * lonLatRatio(tacsat[c(startVMS,endVMS),"SI_LONG"],tacsat[c(startVMS,endVMS),"SI_LATI"])[1] * tacsat[startVMS,"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
Hy1 <- Hy1 * params$fm * lonLatRatio(tacsat[c(startVMS,endVMS),"SI_LONG"],tacsat[c(startVMS,endVMS),"SI_LATI"])[2] * tacsat[endVMS,"SI_SP"]/((params$st[2]-params$st[1]) /2+params$st[1])
#Finalizing the interpolation based on cHs
fx <- numeric()
fy <- numeric()
fx <- F00*Mx0+F10*Hx0+F01*Mx1+F11*Hx1
fy <- F00*My0+F10*Hy0+F01*My1+F11*Hy1
#Add one to list of successful interpolations
iSuccess <- iSuccess + 1
returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))
}
#Interpolate according to a straight line
if(method == "SL" & int == 1){
fx <- seq(tacsat$SI_LONG[startVMS],tacsat$SI_LONG[endVMS],length.out=res)
fy <- seq(tacsat$SI_LATI[startVMS],tacsat$SI_LATI[endVMS],length.out=res)
#Add one to list of successful interpolations
iSuccess <- iSuccess + 1
returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))
}
}
}
}
return(returnInterpolations)}
cat("Loading Table\n")
tacsatX <-read.table(inputFile,sep=",",header=T)
cat("Adjusting Columns Types\n")
tacsatX<-transform(tacsatX, VE_COU= as.character(VE_COU), VE_REF= as.character(VE_REF), SI_LATI= as.numeric(SI_LATI), SI_LONG= as.numeric(SI_LONG), SI_DATE= as.character(SI_DATE),SI_TIME= as.character(SI_TIME),SI_SP= as.numeric(SI_SP),SI_HE= as.numeric(SI_HE))
tacsatX$SI_DATIM=NULL
cat("Sorting dataset\n")
tacsatS <- sortTacsat(tacsatX)
tacsatCut<-tacsatS
tacsatCut <- tacsatS[1:1000,]
cat("Interpolating\n")
interpolation <- interpolateTacsat(tacsatCut,interval=interval,margin=margin,res=res, method=method,params=list(fm=fm,distscale=distscale,sigline=sigline,st=st),headingAdjustment=headingAdjustment,fast=fast)
cat("Reconstructing Dataset\n")
tacsatInt <- interpolation2Tacsat(interpolation=interpolation,tacsat=tacsatCut,npoints=npoints,equalDist=equalDist)
tacsatInt <- sortTacsat(tacsatInt)
cat("Writing output file\n")
write.csv(tacsatInt, outputFile, row.names=T)
print(Sys.time())
cat("All Done.\n")

1
cfg/modelers.properties Normal file
View File

@ -0,0 +1 @@
HSPEN_MODELER=org.gcube.dataanalysis.ecoengine.modeling.SimpleModeler

4
cfg/models.properties Normal file
View File

@ -0,0 +1,4 @@
HSPEN=org.gcube.dataanalysis.ecoengine.models.ModelHSPEN
AQUAMAPSNN=org.gcube.dataanalysis.ecoengine.models.ModelAquamapsNN
FEED_FORWARD_ANN=org.gcube.dataanalysis.ecoengine.models.FeedForwardNN
FEED_FORWARD_ANN_FILE=org.gcube.dataanalysis.ecoengine.models.testing.FeedForwardNNFile

View File

@ -0,0 +1,11 @@
AQUAMAPS_SUITABLE=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsSuitableNode
AQUAMAPS_NATIVE=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsNativeNode
AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsNative2050Node
AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsSuitable2050Node
OCCURRENCES_MERGER=org.gcube.dataanalysis.executor.nodes.transducers.OccurrenceMergingNode
OCCURRENCES_INTERSECTOR=org.gcube.dataanalysis.executor.nodes.transducers.OccurrenceIntersectionNode
OCCURRENCES_SUBTRACTION=org.gcube.dataanalysis.executor.nodes.transducers.OccurrenceSubtractionNode
LWR=org.gcube.dataanalysis.executor.nodes.algorithms.LWR
BIONYM=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymFlexibleWorkflowTransducer
BIONYM_BIODIV=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymBiodiv
CMSY=org.gcube.dataanalysis.executor.nodes.algorithms.CMSY

2620
cfg/operators.xml Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,16 @@
BIOCLIMATE_HSPEC=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHSPECTransducer
BIOCLIMATE_HCAF=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHCAFTransducer
BIOCLIMATE_HSPEN=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHSPENTransducer
HCAF_INTERPOLATION=org.gcube.dataanalysis.ecoengine.transducers.InterpolationTransducer
HCAF_FILTER=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.HcafFilter
HSPEN_FILTER=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.HspenFilter
ABSENCE_CELLS_FROM_AQUAMAPS=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.MarineAbsencePointsFromAquamapsDistribution
PRESENCE_CELLS_GENERATION=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.MarinePresencePoints
OCCURRENCES_MERGER=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsMerger
OCCURRENCES_INTERSECTOR=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsIntersector
OCCURRENCES_MARINE_TERRESTRIAL=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsInSeaOnEarth
OCCURRENCES_DUPLICATES_DELETER=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsDuplicatesDeleter
OCCURRENCES_SUBTRACTION=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsSubtraction
BIONYM=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymTransducer
BIONYM_LOCAL=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymLocalTransducer
SGVM_INTERPOLATION=org.gcube.dataanalysis.executor.rscripts.SGVMS_Interpolation

View File

@ -0,0 +1,13 @@
ANOMALIES_DETECTION=DBSCAN,KMEANS,XMEANS
CLASSIFICATION=FEED_FORWARD_A_N_N_DISTRIBUTION
CLIMATE=BIOCLIMATE_HSPEC,BIOCLIMATE_HCAF,BIOCLIMATE_HSPEN,HCAF_INTERPOLATION
CORRELATION_ANALYSIS=HRS
DATA_CLUSTERING=DBSCAN,KMEANS,XMEANS
FILTERING=HCAF_FILTER,HSPEN_FILTER
FUNCTION_SIMULATION=FEED_FORWARD_A_N_N_DISTRIBUTION
OCCURRENCES=ABSENCE_CELLS_FROM_AQUAMAPS,PRESENCE_CELLS_GENERATION,OCCURRENCES_MERGER,OCCURRENCES_INTERSECTOR,OCCURRENCES_MARINE_TERRESTRIAL,OCCURRENCES_DUPLICATES_DELETER,OCCURRENCES_SUBTRACTION
PERFORMANCES_EVALUATION=QUALITY_ANALYSIS,DISCREPANCY_ANALYSIS
SPECIES_SIMULATION=AQUAMAPS_SUITABLE,AQUAMAPS_NATIVE,AQUAMAPS_NATIVE_2050,AQUAMAPS_SUITABLE_2050,AQUAMAPS_NATIVE_NEURALNETWORK,AQUAMAPS_SUITABLE_NEURALNETWORK
TRAINING=HSPEN,AQUAMAPSNN,FEED_FORWARD_ANN
TIME_SERIES=HCAF_INTERPOLATION
VESSELS=SGVM_INTERPOLATION

18
createscript.jardesc Normal file
View File

@ -0,0 +1,18 @@
<?xml version="1.0" encoding="WINDOWS-1252" standalone="no"?>
<jardesc>
<jar path="ExecutorScriptFile/org.gcube.dataanalysis.executor.executorscriptplugin.jar"/>
<options buildIfNeeded="true" compress="true" descriptionLocation="/ExecutorScriptFile/createscript.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
<selectedProjects/>
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
<sealing sealJar="false">
<packagesToSeal/>
<packagesToUnSeal/>
</sealing>
</manifest>
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
<javaElement handleIdentifier="=ExecutorScriptFile/src&lt;org.gcube.dataanalysis.executor.plugin.dummy"/>
<javaElement handleIdentifier="=ExecutorScriptFile/src&lt;org.gcube.dataanalysis.executor.scripts"/>
<javaElement handleIdentifier="=ExecutorScriptFile/src&lt;org.gcube.dataanalysis.executor.plugin"/>
</selectedElements>
</jardesc>

16
deployDesktop.jardesc Normal file
View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<jardesc>
<jar path="C:/Users/coro/Desktop/EcologicalEngineExecutor-1.4.0-SNAPSHOT.jar"/>
<options buildIfNeeded="true" compress="true" descriptionLocation="/EcologicalEngineExecutor/deployDesktop.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
<selectedProjects/>
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
<sealing sealJar="false">
<packagesToSeal/>
<packagesToUnSeal/>
</sealing>
</manifest>
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java"/>
</selectedElements>
</jardesc>

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<jardesc>
<jar path="EcologicalEngineExecutor/PARALLEL_PROCESSING/EcologicalEngineExecutor-1.2.0-SNAPSHOT.jar"/>
<options buildIfNeeded="true" compress="true" descriptionLocation="/EcologicalEngineExecutor/deployParallelProcessing.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
<selectedProjects/>
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
<sealing sealJar="false">
<packagesToSeal/>
<packagesToUnSeal/>
</sealing>
</manifest>
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java"/>
</selectedElements>
</jardesc>

2
distro/CHANGELOG Normal file
View File

@ -0,0 +1,2 @@
v. 1.0.0 (20-04-2011)
* First release

1
distro/INSTALL Normal file
View File

@ -0,0 +1 @@
Used as a library in the gCube Framework

8
distro/LICENSE Normal file
View File

@ -0,0 +1,8 @@
gCube System - License
------------------------------------------------------------
The gCube/gCore software is licensed as Free Open Source software conveying to
the EUPL (http://ec.europa.eu/idabc/eupl).
The software and documentation is provided by its authors/distributors "as is"
and no expressed or implied warranty is given for its use, quality or fitness
for a particular case.

2
distro/MAINTAINERS Normal file
View File

@ -0,0 +1,2 @@
Gianpaolo Coro (gianpaolo.coro@isti.cnr.it), CNR Pisa,
Istituto di Scienza e Tecnologie dell'Informazione "A. Faedo"

42
distro/README Normal file
View File

@ -0,0 +1,42 @@
The gCube System - Ecological Engine Library
------------------------------------------------------------
This work is partially funded by the European Commission in the
context of the D4Science project (www.d4science.eu), under the
1st call of FP7 IST priority.
Authors
-------
* Gianpaolo Coro (gianpaolo.coro@isti.cnr.it), CNR Pisa,
Istituto di Scienza e Tecnologie dell'Informazione "A. Faedo"
Version and Release Date
------------------------
version 1.0.0 (22-06-2012)
Description
--------------------
Support library for statistics analysis on Time Series data.
Download information
--------------------
Source code is available from SVN:
http://svn.research-infrastructures.eu/d4science/gcube/trunk/content-management/EcologicalModelling
Binaries can be downloaded from:
http://software.d4science.research-infrastructures.eu/
Documentation
-------------
VREManager documentation is available on-line from the Projects Documentation Wiki:
https://gcube.wiki.gcube-system.org/gcube/index.php/Ecological_Modeling
Licensing
---------
This software is licensed under the terms you may find in the file named "LICENSE" in this directory.

7
distro/changelog.xml Normal file
View File

@ -0,0 +1,7 @@
<ReleaseNotes xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="xsd/changelog.xsd">
<Changeset component="org.gcube.data-analysis.ecological-engine-executor.1-0-0"
date="2012-02-23">
<Change>First Release</Change>
</Changeset>
</ReleaseNotes>

42
distro/descriptor.xml Normal file
View File

@ -0,0 +1,42 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>servicearchive</id>
<formats>
<format>tar.gz</format>
</formats>
<baseDirectory>/</baseDirectory>
<fileSets>
<fileSet>
<directory>${distroDirectory}</directory>
<outputDirectory>/</outputDirectory>
<useDefaultExcludes>true</useDefaultExcludes>
<includes>
<include>README</include>
<include>LICENSE</include>
<include>INSTALL</include>
<include>MAINTAINERS</include>
<include>changelog.xml</include>
</includes>
<fileMode>755</fileMode>
<filtered>true</filtered>
</fileSet>
</fileSets>
<files>
<file>
<source>${distroDirectory}/profile.xml</source>
<outputDirectory>/</outputDirectory>
<filtered>true</filtered>
</file>
<file>
<source>target/${build.finalName}.jar</source>
<outputDirectory>/${artifactId}</outputDirectory>
</file>
<file>
<source>${distroDirectory}/svnpath.txt</source>
<outputDirectory>/${artifactId}</outputDirectory>
<filtered>true</filtered>
</file>
</files>
</assembly>

25
distro/profile.xml Normal file
View File

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<Resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ID></ID>
<Type>Library</Type>
<Profile>
<Description>Ecological Engine Executor Library</Description>
<Class>EcologicalEngineExecutor</Class>
<Name>${artifactId}</Name>
<Version>1.0.0</Version>
<Packages>
<Software>
<Name>${artifactId}</Name>
<Version>${version}</Version>
<MavenCoordinates>
<groupId>${groupId}</groupId>
<artifactId>${artifactId}</artifactId>
<version>${version}</version>
</MavenCoordinates>
<Files>
<File>${build.finalName}.jar</File>
</Files>
</Software>
</Packages>
</Profile>
</Resource>

1
distro/svnpath.txt Normal file
View File

@ -0,0 +1 @@
https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine

View File

@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<Resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ID/>
<Type>Service</Type>
<Profile>
<Description>A Plugin Executing S.O. Scripts</Description>
<Class>ExecutorPlugins</Class>
<Name>ExecutorScript</Name>
<Version>1.0.0</Version>
<Packages>
<Plugin>
<Name>plugin</Name>
<Version>1.0.0</Version>
<TargetService>
<Service>
<Class>VREManagement</Class>
<Name>Executor</Name>
<Version>1.1.0</Version>
</Service>
<Package>main</Package>
<Version>1.0.0</Version>
</TargetService>
<EntryPoint>org.gcube.dataanalysis.executor.plugin.ScriptPluginContext</EntryPoint>
<Files><File>org.gcube.dataanalysis.executor.executorscriptplugin.jar</File></Files>
</Plugin>
</Packages>
</Profile>
</Resource>

28
etc/profile.xml Normal file
View File

@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<Resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ID/>
<Type>Service</Type>
<Profile>
<Description>A Plugin Executing S.O. Scripts</Description>
<Class>ExecutorPlugins</Class>
<Name>ExecutorScript</Name>
<Version>1.0.0</Version>
<Packages>
<Plugin>
<Name>plugin</Name>
<Version>1.0.0</Version>
<TargetService>
<Service>
<Class>VREManagement</Class>
<Name>Executor</Name>
<Version>1.1.0</Version>
</Service>
<Package>main</Package>
<Version>1.0.0</Version>
</TargetService>
<EntryPoint>org.gcube.dataanalysis.executor.plugin.ScriptPluginContext</EntryPoint>
<Files><File>org.gcube.dataanalysis.executor.executorscriptplugin.jar</File></Files>
</Plugin>
</Packages>
</Profile>
</Resource>

Binary file not shown.

138
pom.xml Normal file
View File

@ -0,0 +1,138 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>maven-parent</artifactId>
<groupId>org.gcube.tools</groupId>
<version>1.0.0</version>
<relativePath />
</parent>
<groupId>org.gcube.dataanalysis</groupId>
<artifactId>EcologicalEngineExecutor</artifactId>
<version>1.6.4-SNAPSHOT</version>
<name>ecological-engine-executor</name>
<description>ecological-engine-executor</description>
<properties>
<distroDirectory>${project.basedir}/distro</distroDirectory>
</properties>
<dependencies>
<dependency>
<groupId>org.gcube.resourcemanagement</groupId>
<artifactId>executor-service</artifactId>
<version>[1.2.0-SNAPSHOT,2.0.0-SNAPSHOT)</version>
</dependency>
<dependency>
<groupId>org.gcube.contentmanagement</groupId>
<artifactId>storage-manager-core</artifactId>
<version>[2.0.2-SNAPSHOT,4.0.0)</version>
</dependency>
<dependency>
<groupId>org.gcube.contentmanagement</groupId>
<artifactId>storage-manager-wrapper</artifactId>
<version>[2.0.2-SNAPSHOT,4.0.0)</version>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>activemq-core</artifactId>
<version>5.6.0</version>
</dependency>
<dependency>
<groupId>org.gcube.core</groupId>
<artifactId>gcf</artifactId>
<version>[1.4.1,2.0.0)</version>
</dependency>
<dependency>
<groupId>org.gcube.dataanalysis</groupId>
<artifactId>ecological-engine</artifactId>
<version>[1.8.0-SNAPSHOT,2.0.0)</version>
</dependency>
<dependency>
<groupId>org.gcube.informationsystem</groupId>
<artifactId>is-client</artifactId>
<version>[1.5.1,1.6.0]</version>
</dependency>
<dependency>
<groupId>org.gcube.informationsystem</groupId>
<artifactId>is-collector-stubs</artifactId>
<version>[3.0.0-SNAPSHOT, 3.1.0)</version>
</dependency>
<dependency>
<groupId>org.gcube.core</groupId>
<artifactId>common-scope</artifactId>
<version>[1.2.0-SNAPSHOT,3.0.0)</version>
</dependency>
<!-- <dependency>
<groupId>org.apache.jcs</groupId>
<artifactId>jcs</artifactId>
<version>1.3</version>
</dependency>-->
<!-- <dependency> <groupId>org.gcube.dataanalysis</groupId> <artifactId>generic-worker</artifactId>
<version>1.2.0-SNAPSHOT</version> <type>jar</type> <scope>compile</scope>
<exclusions> <exclusion> <artifactId>common-scope</artifactId> <groupId>org.gcube.core</groupId>
</exclusion> </exclusions> </dependency> -->
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.12</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<version>2.5</version>
<executions>
<execution>
<id>copy-profile</id>
<phase>install</phase>
<goals>
<goal>copy-resources</goal>
</goals>
<configuration>
<outputDirectory>target</outputDirectory>
<resources>
<resource>
<directory>${distroDirectory}</directory>
<filtering>true</filtering>
<includes>
<include>profile.xml</include>
</includes>
</resource>
</resources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.2</version>
<configuration>
<descriptors>
<descriptor>${distroDirectory}/descriptor.xml</descriptor>
</descriptors>
</configuration>
<executions>
<execution>
<id>servicearchive</id>
<phase>install</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,148 @@
package org.gcube.dataanalysis.executor.generators;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.ecoengine.configuration.ALG_PROPS;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.interfaces.ActorNode;
import org.gcube.dataanalysis.ecoengine.interfaces.Generator;
import org.gcube.dataanalysis.ecoengine.interfaces.GenericAlgorithm;
import org.gcube.dataanalysis.executor.job.management.DistributedProcessingAgent;
public class D4ScienceDistributedProcessing implements Generator {
public static int maxMessagesAllowedPerJob = 20;
public static boolean forceUpload = true;
public static String defaultContainerFolder = "PARALLEL_PROCESSING";
protected AlgorithmConfiguration config;
protected ActorNode distributedModel;
protected String mainclass;
DistributedProcessingAgent agent;
public D4ScienceDistributedProcessing(){
}
public D4ScienceDistributedProcessing(AlgorithmConfiguration config) {
this.config = config;
AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile);
}
public void compute() throws Exception {
try {
agent.compute();
distributedModel.postProcess(agent.hasResentMessages(),false);
} catch (Exception e) {
distributedModel.postProcess(false,true);
AnalysisLogger.getLogger().error("ERROR: An Error occurred ", e);
throw e;
} finally {
shutdown();
}
}
@Override
public List<StatisticalType> getInputParameters() {
List<StatisticalType> distributionModelParams = new ArrayList<StatisticalType>();
distributionModelParams.add(new ServiceType(ServiceParameters.USERNAME,"ServiceUserName","The final user Name"));
return distributionModelParams;
}
@Override
public String getResources() {
return agent.getResources();
}
@Override
public float getStatus() {
return agent.getStatus();
}
@Override
public StatisticalType getOutput() {
return distributedModel.getOutput();
}
@Override
public ALG_PROPS[] getSupportedAlgorithms() {
ALG_PROPS[] p = { ALG_PROPS.PHENOMENON_VS_PARALLEL_PHENOMENON};
return p;
}
@Override
public INFRASTRUCTURE getInfrastructure() {
return INFRASTRUCTURE.D4SCIENCE;
}
@Override
public void init() throws Exception {
Properties p = AlgorithmConfiguration.getProperties(config.getConfigPath() + AlgorithmConfiguration.nodeAlgorithmsFile);
String model = config.getModel();
String algorithm = null;
if ((model!=null) && (model.length()>0))
algorithm = model;
else
algorithm=config.getAgent();
mainclass = p.getProperty(algorithm);
distributedModel = (ActorNode) Class.forName(mainclass).newInstance();
distributedModel.setup(config);
String scope = config.getGcubeScope();
AnalysisLogger.getLogger().info("Using the following scope for the computation:"+scope);
String owner = config.getParam("ServiceUserName");
int leftNum = distributedModel.getNumberOfLeftElements();
int rightNum = distributedModel.getNumberOfRightElements();
agent = new DistributedProcessingAgent(config, scope, owner, mainclass, config.getPersistencePath(), algorithm, defaultContainerFolder, maxMessagesAllowedPerJob, forceUpload, leftNum, rightNum);
agent.setLogger(AnalysisLogger.getLogger());
}
@Override
public void setConfiguration(AlgorithmConfiguration config) {
this.config = config;
AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile);
}
@Override
public void shutdown() {
try {
agent.shutdown();
} catch (Exception e) {
}
try {
distributedModel.stop();
} catch (Exception e) {
}
}
@Override
public String getLoad() {
return agent.getLoad();
}
@Override
public String getResourceLoad() {
return agent.getResourceLoad();
}
@Override
public GenericAlgorithm getAlgorithm() {
return distributedModel;
}
@Override
public String getDescription() {
return "A D4Science Cloud Processor for Species Distributions";
}
}

View File

@ -0,0 +1,293 @@
package org.gcube.dataanalysis.executor.job.management;
import java.io.File;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.apache.axis.message.addressing.Address;
import org.apache.axis.message.addressing.EndpointReferenceType;
import org.apache.log4j.Logger;
import org.gcube.contentmanagement.graphtools.utils.HttpRequest;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanagement.lexicalmatcher.utils.FileTools;
import org.gcube.dataanalysis.ecoengine.configuration.ALG_PROPS;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
import org.gcube.dataanalysis.ecoengine.connectors.livemonitor.ResourceLoad;
import org.gcube.dataanalysis.ecoengine.connectors.livemonitor.Resources;
import org.gcube.dataanalysis.ecoengine.utils.Operations;
import com.thoughtworks.xstream.XStream;
public class DistributedProcessingAgent {
protected QueueJobManager jobManager;
protected boolean deletefiles = true;
protected String mainclass;
public int maxElementsAllowedPerJob = 20;
protected boolean forceUpload = true;
protected boolean stop;
protected String gscope;
protected String userName;
protected String pathToLib;
protected String modelName;
protected String containerFolder;
protected Serializable configurationFile;
protected int rightSetNumberOfElements;
protected int leftSetNumberOfElements;
protected List<String> endpoints;
protected int subdivisiondiv;
protected static String defaultJobOutput = "execution.output";
protected static String defaultScriptFile = "script";
protected Logger logger;
/**
* A distributed processing agent. Performs a distributed computation doing the MAP of the product of two sets: A and B
* Splits over B : A x B1 , A x B2, ... , A x Bn
* Prepares a script to be executed on remote nodes
* The computation is then sent to remote processors.
*/
public DistributedProcessingAgent(Serializable configurationFile,
String gCubeScope,
String computationOwner,
String mainClass,
String pathToLibFolder,
String modelName,
String containerFolder,
int maxElementsPerJob,
boolean forceReUploadofLibs,
int leftSetNumberOfElements,
int rightSetNumberOfElements
) {
this.stop = false;
this.deletefiles = true;
this.gscope=gCubeScope;
this.mainclass=mainClass;
this.maxElementsAllowedPerJob=maxElementsPerJob;
this.forceUpload=forceReUploadofLibs;
this.configurationFile=configurationFile;
this.rightSetNumberOfElements=rightSetNumberOfElements;
this.leftSetNumberOfElements=leftSetNumberOfElements;
this.userName=computationOwner;
this.pathToLib=pathToLibFolder;
this.modelName=modelName;
this.containerFolder=containerFolder;
}
public void setLogger(Logger logger){
this.logger=logger;
}
public void setEndPoints(List<String> endpoints){
this.endpoints=endpoints;
}
public boolean hasResentMessages(){
return jobManager.hasResentMessages();
}
public void compute() throws Exception {
try {
if (logger == null){
logger = AnalysisLogger.getLogger();
}
if (gscope == null)
throw new Exception("Null Scope");
AnalysisLogger.getLogger().debug("SCOPE: "+gscope);
if (endpoints != null) {
List<EndpointReferenceType> eprtList = new ArrayList<EndpointReferenceType>();
for (String ep : endpoints) {
eprtList.add(new EndpointReferenceType(new Address(ep)));
}
jobManager = new QueueJobManager(gscope, endpoints.size(), eprtList);
} else
jobManager = new QueueJobManager(gscope, 1);
int numberOfResources = jobManager.getNumberOfNodes();
// we split along right dimension so if elements are less than nodes, we should reduce the number of nodes
if (numberOfResources > 0) {
// chunkize the number of species in order to lower the computational effort of the workers
subdivisiondiv = rightSetNumberOfElements / (numberOfResources * maxElementsAllowedPerJob);
int rest = rightSetNumberOfElements % (numberOfResources * maxElementsAllowedPerJob);
if (rest > 0)
subdivisiondiv++;
if (subdivisiondiv == 0)
subdivisiondiv = 1;
executeWork(leftSetNumberOfElements, rightSetNumberOfElements, 0, subdivisiondiv, deletefiles, forceUpload);
if (jobManager.wasAborted()) {
logger.debug("Warning: Job was aborted");
// distributionModel.postProcess(false,true);
throw new Exception("Job System Error");
}
else{
//postprocess
// distributionModel.postProcess(jobManager.hasResentMessages(),false);
}
} else {
logger.debug("Warning: No Workers available");
throw new Exception("No Workers available");
}
} catch (Exception e) {
logger.error("ERROR: An Error occurred ", e);
e.printStackTrace();
throw e;
} finally {
shutdown();
}
}
private void executeWork(int leftNum, int rightNum, int offset, int numberOfResources, boolean deletefiles, boolean forceUpload) throws Exception {
String owner = userName;
int[] chunkSizes = Operations.takeChunks(rightNum, numberOfResources);
List<String> arguments = new ArrayList<String>();
// chunkize respect to the cells: take a chunk of cells vs all species at each node!
for (int i = 0; i < chunkSizes.length; i++) {
String argumentString = "0 " + leftNum + " " + offset + " " + chunkSizes[i] + " ./ "+mainclass;
arguments.add(argumentString);
offset += chunkSizes[i];
logger.debug("Generator-> Argument " + i + ": " + argumentString);
}
if (owner == null)
throw new Exception("Null Owner");
String pathToDir = new File (pathToLib, containerFolder).getAbsolutePath();
if (!(new File(pathToDir).exists()))
throw new Exception("No Implementation of node-model found for algorithm " + pathToDir);
if (mainclass == null)
throw new Exception("No mainClass found for algorithm " + pathToDir);
buildScriptFile(modelName, defaultJobOutput, pathToDir, mainclass);
jobManager.uploadAndExecuteChunkized(AlgorithmConfiguration.StatisticalManagerClass, AlgorithmConfiguration.StatisticalManagerService, owner, pathToDir, "/" + modelName + "/", "./", getScriptName(mainclass), arguments, new XStream().toXML(configurationFile), deletefiles, forceUpload);
}
private String getScriptName(String fullMainClass){
String scriptName = defaultScriptFile+"_"+fullMainClass.substring(fullMainClass.lastIndexOf(".")+1)+".sh";
return scriptName;
}
// builds a job.sh
public void buildScriptFile(String jobName, String jobOutput, String jarsPath, String fullMainClass) throws Exception {
File expectedscript = new File(jarsPath,getScriptName(fullMainClass));
if (!expectedscript.exists()) {
StringBuffer sb = new StringBuffer();
sb.append("#!/bin/sh\n");
sb.append("# " + jobName + "\n");
sb.append("cd $1\n");
sb.append("\n");
sb.append("java -Xmx1024M -classpath ./:");
File jarsPathF = new File(jarsPath);
File[] files = jarsPathF.listFiles();
for (File jar : files) {
if (jar.getName().endsWith(".jar")) {
sb.append("./" + jar.getName());
sb.append(":");
}
}
sb.deleteCharAt(sb.length() - 1);
sb.append(" " + fullMainClass + " $2 " + jobOutput);
sb.append("\n");
AnalysisLogger.getLogger().trace("D4ScienceGenerator->Generating script in " + expectedscript.getAbsolutePath());
FileTools.saveString(expectedscript.getAbsolutePath(), sb.toString(), true, "UTF-8");
}
AnalysisLogger.getLogger().trace("D4ScienceGenerator->Script " + expectedscript.getAbsolutePath()+" yet exists!");
}
public String getResources() {
Resources res = new Resources();
try {
int activeNodes = jobManager.getActiveNodes();
for (int i = 0; i < activeNodes; i++) {
try {
res.addResource("Worker_" + (i + 1), 100);
} catch (Exception e1) {
}
}
} catch (Exception e) {
AnalysisLogger.getLogger().debug("D4ScienceGenerator->active nodes not ready");
}
if ((res != null) && (res.list != null))
return HttpRequest.toJSon(res.list).replace("resId", "resID");
else
return "";
}
public float getStatus() {
try {
if (stop)
return 100f;
else
if (jobManager!=null)
return Math.max(0.5f, jobManager.getStatus() * 100f);
else
return 0;
} catch (Exception e) {
return 0f;
}
}
public ALG_PROPS[] getSupportedAlgorithms() {
ALG_PROPS[] p = { ALG_PROPS.PHENOMENON_VS_PARALLEL_PHENOMENON};
return p;
}
public INFRASTRUCTURE getInfrastructure() {
return INFRASTRUCTURE.D4SCIENCE;
}
public void shutdown() {
try {
jobManager.stop();
} catch (Exception e) {
}
stop = true;
}
public String getLoad() {
long tk = System.currentTimeMillis();
ResourceLoad rs = null;
if (jobManager!=null)
rs = new ResourceLoad(tk, jobManager.currentNumberOfStages*subdivisiondiv);
else
rs = new ResourceLoad(tk, 0);
return rs.toString();
}
private long lastTime;
private int lastProcessed;
public String getResourceLoad() {
long thisTime = System.currentTimeMillis();
int processedRecords = 0;
if ((jobManager!=null) && (subdivisiondiv>0))
processedRecords = jobManager.currentNumberOfStages*subdivisiondiv;
int estimatedProcessedRecords = 0;
if (processedRecords == lastProcessed) {
estimatedProcessedRecords = Math.round(((float) thisTime * (float) lastProcessed) / (float) lastTime);
} else {
lastProcessed = processedRecords;
estimatedProcessedRecords = lastProcessed;
}
lastTime = thisTime;
ResourceLoad rs = new ResourceLoad(thisTime, estimatedProcessedRecords);
return rs.toString();
}
}

View File

@ -0,0 +1,821 @@
package org.gcube.dataanalysis.executor.job.management;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import java.util.UUID;
import javax.jms.ExceptionListener;
import javax.jms.JMSException;
import javax.jms.Message;
import javax.jms.MessageListener;
import org.apache.activemq.ActiveMQConnection;
import org.apache.axis.message.addressing.EndpointReferenceType;
import org.gcube.common.core.contexts.GHNContext;
import org.gcube.common.core.informationsystem.client.AtomicCondition;
import org.gcube.common.core.informationsystem.client.ISClient;
import org.gcube.common.core.informationsystem.client.RPDocument;
import org.gcube.common.core.informationsystem.client.queries.WSResourceQuery;
import org.gcube.common.core.scope.GCUBEScope;
import org.gcube.common.scope.api.ScopeProvider;
import org.gcube.contentmanagement.blobstorage.resource.StorageObject;
import org.gcube.contentmanagement.blobstorage.service.IClient;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanager.storageclient.wrapper.AccessType;
import org.gcube.contentmanager.storageclient.wrapper.MemoryType;
import org.gcube.contentmanager.storageclient.wrapper.StorageClient;
import org.gcube.dataanalysis.ecoengine.utils.Operations;
import org.gcube.dataanalysis.executor.messagequeue.ATTRIBUTE;
import org.gcube.dataanalysis.executor.messagequeue.Consumer;
import org.gcube.dataanalysis.executor.messagequeue.Producer;
import org.gcube.dataanalysis.executor.messagequeue.QCONSTANTS;
import org.gcube.dataanalysis.executor.messagequeue.QueueManager;
import org.gcube.dataanalysis.executor.scripts.ScriptIOWorker;
import org.gcube.vremanagement.executor.stubs.ExecutorCall;
import org.gcube.vremanagement.executor.stubs.TaskCall;
import org.gcube.vremanagement.executor.stubs.TaskProxy;
public class QueueJobManager {
// broadcast message period
public static int broadcastTimePeriod = 120000;
// max silence before computation stops
public static int maxSilenceTimeBeforeComputationStop = 10800000;
// max number of retries per computation step
public static int maxNumberOfComputationRetries = 1;
// period for controlling a node activity
public static int computationWatcherTimerPeriod = 120000;
// max number of message to put in a queue
// protected static int maxNumberOfMessages = 20;
public static int maxNumberOfStages = Integer.MAX_VALUE;//10;
// timeout for resending a message
public static int queueWatcherMaxwaitingTime = QCONSTANTS.refreshStatusTime;// * 5;
protected int maxFailureTries;
private static String pluginName = "generic-worker";//"GenericWorker";
protected String scope;
protected GCUBEScope gscope;
protected String session;
protected boolean yetstopped;
protected boolean messagesresent;
protected float status;
protected boolean abort;
protected boolean shutdown;
protected List<EndpointReferenceType> eprs;
protected int activeNodes;
protected int computingNodes;
protected int numberOfMessages;
protected int totalNumberOfMessages;
protected int actualNumberOfNodes;
protected int totalNumberOfStages;
public int currentNumberOfStages;
// files management
protected List<String> filenames;
protected List<String> fileurls;
// queue parameters
protected String queueName;
protected String queueResponse;
protected String queueURL;
protected String queueUSER;
protected String queuePWD;
protected org.gcube.dataanalysis.executor.messagequeue.Consumer consumer;
protected Producer producer;
Timer broadcastTimer;
Timer computationWatcherTimer;
ComputationTimerWatcher computationWatcher;
String serviceClass;
String serviceName;
String owner;
String localDir;
String remoteDir;
String outputDir;
String script;
List<String> arguments;
String configuration;
boolean deletefiles;
StatusListener statuslistener;
private void resetAllVars() {
scope = null;
gscope = null;
yetstopped = false;
messagesresent = false;
status = 0;
abort = false;
shutdown = false;
eprs = null;
activeNodes = 0;
computingNodes = 0;
numberOfMessages = 0;
actualNumberOfNodes = 0;
filenames = null;
fileurls = null;
queueName = null;
queueResponse = null;
queueURL = null;
queueUSER = null;
queuePWD = null;
consumer = null;
producer = null;
broadcastTimer = null;
computationWatcherTimer = null;
computationWatcher = null;
serviceClass = null;
serviceName = null;
owner = null;
localDir = null;
remoteDir = null;
outputDir = null;
script = null;
arguments = null;
configuration = null;
deletefiles = false;
statuslistener = null;
}
public int getActiveNodes() {
return computingNodes;
}
public float getStatus() {
float innerStatus = 0;
if (totalNumberOfMessages != 0)
innerStatus = (1f - ((float) numberOfMessages / (float) totalNumberOfMessages));
if (totalNumberOfStages == 0)
return innerStatus;
else {
float offset = ((float) Math.max(currentNumberOfStages - 1, 0)) / (float) totalNumberOfStages;
float status = offset + (innerStatus / (float) totalNumberOfStages);
// AnalysisLogger.getLogger().info("stages: "+totalNumberOfStages+" inner status: "+innerStatus+" currentStage: "+currentNumberOfStages+" status: "+status);
return status;
}
}
// there is only one node from the client point of view
public int getNumberOfNodes() {
if (eprs.size() > 0)
return 1;
else
return 0;
}
public void setNumberOfNodes(int newNumberOfNodes) {
// ignore this setting in this case
}
private void init(String scope, int numberOfNodes) throws Exception {
resetAllVars();
// init scope variables
this.scope = scope;
gscope = GCUBEScope.getScope(scope);
// introduce a session
// initialize flags
shutdown = false;
yetstopped = false;
messagesresent = false;
abort = false;
// find all the nodes - initialize the eprs
findNodes(scope);
}
public QueueJobManager(String scope, int numberOfNodes) throws Exception {
init(scope, numberOfNodes);
}
public QueueJobManager(String scope, int numberOfNodes, List<EndpointReferenceType> eprs) throws Exception {
init(scope, numberOfNodes);
this.eprs = eprs;
}
private void setGlobalVars(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, String configuration, boolean deletefiles) {
this.serviceClass = serviceClass;
this.serviceName = serviceName;
this.owner = owner;
this.localDir = localDir;
this.remoteDir = remoteDir;
this.outputDir = outputDir;
this.script = script;
this.arguments = arguments;
this.configuration = configuration;
this.deletefiles = deletefiles;
}
private int totalmessages = 0;
public boolean uploadAndExecuteChunkized(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, String configuration, boolean deletefiles, boolean forceUpload) throws Exception {
long t0 = System.currentTimeMillis();
int elements = arguments.size();
/*
* int div = elements / (maxNumberOfMessages); int rest = elements % (maxNumberOfMessages); if (rest > 0) div++; if (div == 0) { div = 1; }
*/
session = (("" + UUID.randomUUID()).replace("-", "") + Math.random()).replace(".", "");
int[] chunkSizes = null;
//up to 1120 species we don't make stages
if (elements>maxNumberOfStages)
chunkSizes = Operations.takeChunks(elements, maxNumberOfStages);
else {
chunkSizes = new int[1];
chunkSizes[0]=elements;
}
int allchunks = chunkSizes.length;
totalNumberOfStages = allchunks;
currentNumberOfStages = 0;
int start = 0;
totalmessages = 0;
AnalysisLogger.getLogger().info("Starting the computation in "+allchunks+" stages");
for (int i = 0; i < allchunks; i++) {
numberOfMessages = totalNumberOfMessages = 0;
currentNumberOfStages++;
int end = Math.min(elements, start + chunkSizes[i]);
AnalysisLogger.getLogger().info("Computing the chunk number " + (i + 1) + " of " + allchunks + " between " + start + " and " + (end - 1));
List<String> sublist = new ArrayList<String>();
for (int j = start; j < end; j++)
sublist.add(arguments.get(j));
AnalysisLogger.getLogger().info("size sub:" + sublist.size());
// totalmessages=totalmessages+sublist.size();
uploadAndExecute(serviceClass, serviceName, owner, localDir, remoteDir, outputDir, script, sublist, configuration, deletefiles, forceUpload);
if (abort)
break;
start = end;
AnalysisLogger.getLogger().info("Processed chunk number " + (i + 1));
}
currentNumberOfStages = totalNumberOfStages;
AnalysisLogger.getLogger().info("Finished computation on all chunks and messages " + totalmessages);
AnalysisLogger.getLogger().info("Whole Procedure done in " + (System.currentTimeMillis() - t0) + " ms");
return (!abort);
}
private boolean uploadAndExecute(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, String configuration, boolean deletefiles, boolean forceUpload) throws Exception {
int numberOfRetries = maxNumberOfComputationRetries;
boolean recompute = true;
while ((numberOfRetries > 0) && (recompute)) {
long t0 = System.currentTimeMillis();
// if (numberOfRetries<maxNumberOfComputationRetries)
init(scope, 1);
AnalysisLogger.getLogger().info("Computation Try number " + (maxNumberOfComputationRetries + 1 - numberOfRetries));
AnalysisLogger.getLogger().info("Contacting " + actualNumberOfNodes + " Nodes");
// set globals
setGlobalVars(serviceClass, serviceName, owner, localDir, remoteDir, outputDir, script, arguments, configuration, deletefiles);
// if not yet uploaded , upload required files
uploadFilesOnStorage(forceUpload);
// initializing queue
setQueueVariables();
// broadcast a message to all executors for purging previous queues
// purgeQueues();
createClientProducer();
broadcastListenCommandToExecutorNodes();
maxFailureTries = activeNodes * 1;
broadcastTimer = new Timer();
broadcastTimer.schedule(new Broadcaster(), broadcastTimePeriod, broadcastTimePeriod);
computationWatcherTimer = new Timer();
computationWatcher = new ComputationTimerWatcher(maxSilenceTimeBeforeComputationStop);
computationWatcherTimer.schedule(computationWatcher, computationWatcherTimerPeriod, computationWatcherTimerPeriod);
// send all messages
sendMessages();
createClientConsumer();
// wait for messages
waitForMessages();
AnalysisLogger.getLogger().info("Wait for message finished - checking result");
if (numberOfMessages == 0) {
AnalysisLogger.getLogger().info("All tasks have correctly finished!");
}
/*
* else{ AnalysisLogger.getLogger().info("Timeout - Warning Some Task is missing!"); for (int k=0;k<finishedChunks.length;k++){ if (finishedChunks[k]==0){ AnalysisLogger.getLogger().info("Sending Again message number " + k); Map<String, Object> inputs = generateInputMessage(filenames, fileurls, outputDir, script, arguments.get(k), k, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles); producer.sendMessage(inputs, 0); AnalysisLogger.getLogger().info("Sent Message " + k); } } waitForMessages(); if (numberOfMessages>0){ abort = true; } }
*/
// deleteRemoteFolder();
// summary
AnalysisLogger.getLogger().info("-SUMMARY-");
for (int i = 0; i < totalNumberOfMessages; i++) {
if (activeMessages[i])
AnalysisLogger.getLogger().info("Error : the Message Number " + i + " Was Never Processed!");
if (resentMessages[i] > 0) {
messagesresent = true;
AnalysisLogger.getLogger().info("Warning : the Message Number " + i + " Was resent " + resentMessages[i] + " Times");
}
}
AnalysisLogger.getLogger().info("-SUMMARY END-");
stop();
AnalysisLogger.getLogger().info("Stopped");
AnalysisLogger.getLogger().info("Single Step Procedure done in " + (System.currentTimeMillis() - t0) + " ms");
activeNodes = 0;
numberOfRetries--;
if (abort) {
recompute = true;
if (numberOfRetries > 0)
Thread.sleep(10000);
} else
recompute = false;
}
return (!abort);
}
public boolean hasResentMessages() {
return messagesresent;
}
public void waitForMessages() throws Exception {
AnalysisLogger.getLogger().info("Waiting...");
while ((numberOfMessages > 0) && (!abort)) {
Thread.sleep(2000);
// long tcurrent = System.currentTimeMillis();
// if ((tcurrent - waitTime) > maxwaitingTime) {
// break;
// }
}
AnalysisLogger.getLogger().info("...Stop - Abort?" + abort);
}
public boolean wasAborted() {
return abort;
}
public void purgeQueues() throws Exception {
AnalysisLogger.getLogger().info("Purging Queue");
List<WorkerWatcher> tasksProxies = new ArrayList<WorkerWatcher>();
for (int j = 0; j < actualNumberOfNodes; j++) {
try {
contactNodes(tasksProxies, j, queueName, queueUSER, queuePWD, queueURL, queueResponse, session, "true");
} catch (Exception e) {
e.printStackTrace();
AnalysisLogger.getLogger().info("Error in purgin queue on node " + j);
}
}
AnalysisLogger.getLogger().info("Queue Purged");
}
public void stop() {
try {
if (!yetstopped) {
if (broadcastTimer != null) {
AnalysisLogger.getLogger().info("Stopping Broadcaster");
broadcastTimer.cancel();
broadcastTimer.purge();
}
if (computationWatcherTimer != null) {
AnalysisLogger.getLogger().info("Stopping Watcher");
computationWatcherTimer.cancel();
computationWatcherTimer.purge();
}
AnalysisLogger.getLogger().info("Purging Status Listener");
if (statuslistener != null)
statuslistener.destroyAllWatchers();
AnalysisLogger.getLogger().info("Stopping Producer and Consumer");
try{
producer.stop();
producer.closeSession();
}catch(Exception e1){}
try{
consumer.stop();
consumer.closeSession();
}catch(Exception e2){}
AnalysisLogger.getLogger().info("Purging Remote Queues");
purgeQueues();
yetstopped = true;
}
} catch (Exception e) {
e.printStackTrace();
AnalysisLogger.getLogger().info("Not completely stopped");
}
}
private void contactNodes(List<WorkerWatcher> tasksProxies, int order, String queueName, String queueUSER, String queuePWD, String queueURL, String queueResponse, String session, String purgeQueue) throws Exception {
// generate the input map according to the arguments
Map<String, Object> inputs = generateWorkerInput(queueName, queueUSER, queuePWD, queueURL, queueResponse, session, purgeQueue);
AnalysisLogger.getLogger().info("Inputs " + inputs);
// take the i-th endpoint of the executor
EndpointReferenceType selectedEPR = eprs.get(order);
AnalysisLogger.getLogger().info("Broadcasting to node " + (order + 1) + " on " + selectedEPR.getAddress());
// run the executor script
ExecutorCall call = new ExecutorCall(pluginName, gscope);
call.setEndpointReference(selectedEPR);
TaskCall task = null;
AnalysisLogger.getLogger().info("EPR:" + selectedEPR);
task = call.launch(inputs);
// AnalysisLogger.getLogger().info("Task EPR:" + task.getEndpointReference());
TaskProxy proxy = task.getProxy();
tasksProxies.add(new WorkerWatcher(proxy, AnalysisLogger.getLogger()));
// AnalysisLogger.getLogger().info("Contacting node " + (order + 1) + " OK on " + selectedEPR);
}
private int findNodes(String scopeString) throws Exception {
AnalysisLogger.getLogger().debug("SCOPE:"+scopeString);
GCUBEScope scope = GCUBEScope.getScope(scopeString);
ISClient client = GHNContext.getImplementation(ISClient.class);
WSResourceQuery wsquery = client.getQuery(WSResourceQuery.class);
wsquery.addAtomicConditions(new AtomicCondition("//gc:ServiceName", "Executor"));
wsquery.addAtomicConditions(new AtomicCondition("/child::*[local-name()='Task']/name[text()='" + pluginName + "']", pluginName));
List<RPDocument> listdoc = client.execute(wsquery, scope);
EndpointReferenceType epr = null;
eprs = new ArrayList<EndpointReferenceType>();
int numberOfEP = 0;
for (RPDocument resource : listdoc) {
epr = resource.getEndpoint();
numberOfEP++;
eprs.add(epr);
}
AnalysisLogger.getLogger().info("Found " + numberOfEP + " endpoints");
// get current number of available nodes
actualNumberOfNodes = eprs.size();
return numberOfEP;
}
private void setQueueVariables() throws Exception {
queueName = "D4ScienceJob"; // + session;
queueResponse = queueName + "Response"+session;
//general scope
queueURL = gscope.getServiceMap().getEndpoints(GHNContext.MSGBROKER).iterator().next().getAddress().toString();
//tests on ecosystem
//TODO: delete this!
// queueURL = "tcp://ui.grid.research-infrastructures.eu:6166";
// queueURL = "tcp://message-broker.d4science.research-infrastructures.eu:6166";
AnalysisLogger.getLogger().info("Queue for the scope: " + queueURL);
if (queueURL==null){
if (scope.startsWith("/gcube"))
queueURL = "tcp://ui.grid.research-infrastructures.eu:6166";
else
queueURL = "tcp://message-broker.d4science.research-infrastructures.eu:6166";
}
queueUSER = ActiveMQConnection.DEFAULT_USER;
queuePWD = ActiveMQConnection.DEFAULT_PASSWORD;
}
public void deleteRemoteFolder() throws Exception {
ScopeProvider.instance.set(scope);
IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED,MemoryType.VOLATILE).getClient();
// IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, gscope).getClient();
AnalysisLogger.getLogger().info("Removing Remote Dir " + remoteDir);
client.removeDir().RDir(remoteDir);
AnalysisLogger.getLogger().info("Removed");
}
private void uploadFilesOnStorage(boolean forceupload) throws Exception {
ScopeProvider.instance.set(scope);
IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, MemoryType.VOLATILE).getClient();
// IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, gscope).getClient();
File dir = new File(localDir);
File[] files = dir.listFiles();
AnalysisLogger.getLogger().info("Start uploading");
filenames = new ArrayList<String>();
fileurls = new ArrayList<String>();
boolean uploadFiles = forceupload;
// if we do not force upload then check if the folder is yet there
if (!uploadFiles) {
List<StorageObject> remoteObjects = client.showDir().RDir(remoteDir);
// only upload files if they are not yet uploaded
if (remoteObjects.size() == 0)
uploadFiles = true;
}
if (!uploadFiles)
AnalysisLogger.getLogger().info("Unnecessary to Uploading Files");
AnalysisLogger.getLogger().info("Loading files");
for (File sfile : files) {
if (sfile.getName().startsWith("."))
continue;
String localf = sfile.getAbsolutePath();
String filename = sfile.getName();
String remotef = remoteDir + sfile.getName();
if (uploadFiles) {
client.put(true).LFile(localf).RFile(remotef);
AnalysisLogger.getLogger().info("Uploading File "+localf+" as remote file "+remotef);
}
String url = client.getUrl().RFile(remotef);
// AnalysisLogger.getLogger().info("URL obtained: " + url);
filenames.add(filename);
fileurls.add(url);
}
AnalysisLogger.getLogger().info("Loading finished");
}
private void broadcastListenCommandToExecutorNodes() throws Exception {
AnalysisLogger.getLogger().info("Submitting script to Remote Queue " + queueName);
List<WorkerWatcher> tasksProxies = new ArrayList<WorkerWatcher>();
try{
findNodes(scope);
}catch(Exception e){
AnalysisLogger.getLogger().info("Error in Finding nodes - using previous value");
}
activeNodes = actualNumberOfNodes;
// launch the tasks
for (int i = 0; i < actualNumberOfNodes; i++) {
try {
contactNodes(tasksProxies, i, queueName, queueUSER, queuePWD, queueURL, queueResponse, session, "false");
} catch (Exception e) {
e.printStackTrace();
AnalysisLogger.getLogger().info("Error in Contacting nodes");
}
}
}
private void createClientProducer() throws Exception {
AnalysisLogger.getLogger().info("Creating Message Queue and Producer");
// create the Producer
QueueManager qm = new QueueManager();
qm.createAndConnect(queueUSER, queuePWD, queueURL, queueName);
producer = new Producer(qm, queueName);
AnalysisLogger.getLogger().info("Producer OK");
}
private void createClientConsumer() throws Exception {
AnalysisLogger.getLogger().info("Creating Response Message Queue and Consumer");
// create the listener
statuslistener = new StatusListener();
QueueManager qm1 = new QueueManager();
qm1.createAndConnect(queueUSER, queuePWD, queueURL, queueResponse);
consumer = new Consumer(qm1, statuslistener, statuslistener, queueResponse);
AnalysisLogger.getLogger().info("Consumers OK");
}
boolean activeMessages[];
public int resentMessages[];
private void sendMessages() throws Exception {
int i = 0;
numberOfMessages = arguments.size();
totalNumberOfMessages = numberOfMessages;
AnalysisLogger.getLogger().info("Messages To Send " + numberOfMessages);
activeMessages = new boolean[numberOfMessages];
resentMessages = new int[numberOfMessages];
for (String argum : arguments) {
Map<String, Object> inputs = generateInputMessage(filenames, fileurls, outputDir, script, argum, i, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles, false);
producer.sendMessage(inputs, 0);
AnalysisLogger.getLogger().info("Send " + i);
activeMessages[i] = true;
i++;
}
AnalysisLogger.getLogger().info("Messages Sent " + numberOfMessages);
}
private Map<String, Object> generateInputMessage(Object filenames, Object fileurls, String outputDir, String script, String argum, int i, String scope, String serviceClass, String serviceName, String owner, String remoteDir, String session, String configuration, boolean deletefiles, boolean duplicateMessage) {
Map<String, Object> inputs = new HashMap<String, Object>();
inputs.put(ATTRIBUTE.FILE_NAMES.name(), filenames);
inputs.put(ATTRIBUTE.FILE_URLS.name(), fileurls);
inputs.put(ATTRIBUTE.OUTPUTDIR.name(), outputDir);
inputs.put(ATTRIBUTE.SCRIPT.name(), script);
inputs.put(ATTRIBUTE.ARGUMENTS.name(), argum + " " + duplicateMessage);
inputs.put(ATTRIBUTE.ORDER.name(), "" + i);
inputs.put(ATTRIBUTE.SCOPE.name(), scope);
inputs.put(ATTRIBUTE.SERVICE_CLASS.name(), serviceClass);
inputs.put(ATTRIBUTE.SERVICE_NAME.name(), serviceName);
inputs.put(ATTRIBUTE.OWNER.name(), owner);
inputs.put(ATTRIBUTE.REMOTEDIR.name(), remoteDir);
inputs.put(ATTRIBUTE.CLEAN_CACHE.name(), "" + deletefiles);
inputs.put(ATTRIBUTE.QSESSION.name(), session);
inputs.put(ATTRIBUTE.CONFIGURATION.name(), configuration);
inputs.put(ATTRIBUTE.TOPIC_RESPONSE_NAME.name(), queueResponse);
inputs.put(ATTRIBUTE.QUEUE_USER.name(), queueUSER);
inputs.put(ATTRIBUTE.QUEUE_PASSWORD.name(), queuePWD);
inputs.put(ATTRIBUTE.QUEUE_URL.name(), queueURL);
return inputs;
}
private Map<String, Object> generateWorkerInput(String queueName, String queueUser, String queuePassword, String queueURL, String queueResponse, String session, String purge) {
Map<String, Object> inputs = new HashMap<String, Object>();
inputs.put(ATTRIBUTE.TOPIC_NAME.name(), ScriptIOWorker.toInputString(queueName));
inputs.put(ATTRIBUTE.QUEUE_USER.name(), ScriptIOWorker.toInputString(queueUser));
inputs.put(ATTRIBUTE.QUEUE_PASSWORD.name(), ScriptIOWorker.toInputString(queuePassword));
inputs.put(ATTRIBUTE.QUEUE_URL.name(), ScriptIOWorker.toInputString(queueURL));
inputs.put(ATTRIBUTE.TOPIC_RESPONSE_NAME.name(), ScriptIOWorker.toInputString(queueResponse));
inputs.put(ATTRIBUTE.QSESSION.name(), session);
inputs.put(ATTRIBUTE.ERASE.name(), purge);
return inputs;
}
public class Broadcaster extends TimerTask {
@Override
public void run() {
try {
AnalysisLogger.getLogger().info("(((((((((((((((((((((((((((------Broadcasting Information To Watchers------)))))))))))))))))))))))))))");
broadcastListenCommandToExecutorNodes();
AnalysisLogger.getLogger().info("(((((((((((((((((((((((((((------END Broadcasting Information To Watchers------)))))))))))))))))))))))))))");
} catch (Exception e) {
e.printStackTrace();
AnalysisLogger.getLogger().info("--------------------------------Broadcaster: Error Sending Listen Message to Executors------)))))))))))))))))))))))))))");
}
}
}
public class ComputationTimerWatcher extends TimerTask {
long maxTime;
long lastTimeClock;
public ComputationTimerWatcher(long maxtime) {
this.maxTime = maxtime;
this.lastTimeClock = System.currentTimeMillis();
}
public void reset() {
lastTimeClock = System.currentTimeMillis();
}
public void setmaxTime(long maxTime) {
this.maxTime = maxTime;
}
@Override
public void run() {
try {
long t0 = System.currentTimeMillis();
AnalysisLogger.getLogger().info("Computation Watcher Timing Is " + (t0 - lastTimeClock)+" max computation time is "+maxTime);
if ((t0 - lastTimeClock) > maxTime) {
AnalysisLogger.getLogger().info("Computation Watcher - Computation Timeout: Closing Queue Job Manager!!!");
abort();
}
} catch (Exception e) {
e.printStackTrace();
AnalysisLogger.getLogger().info("Error Taking clock");
}
}
}
public synchronized void abort() {
AnalysisLogger.getLogger().info("Computation Aborted");
this.abort = true;
}
public class StatusListener implements MessageListener, ExceptionListener {
private QueueWorkerWatcher[] watchers;
synchronized public void onException(JMSException ex) {
abort();
AnalysisLogger.getLogger().info("JMS Exception occured. Shutting down client.");
}
private synchronized void addWatcher(int order) {
if (watchers == null)
watchers = new QueueWorkerWatcher[totalNumberOfMessages];
QueueWorkerWatcher watcher = watchers[order];
if (watcher != null) {
destroyWatcher(order);
}
Map<String, Object> message = generateInputMessage(filenames, fileurls, outputDir, script, arguments.get(order), order, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles, true);
watchers[order] = new QueueWorkerWatcher(producer, message, order);
}
private synchronized void resetWatcher(int order) {
if (watchers == null)
watchers = new QueueWorkerWatcher[totalNumberOfMessages];
else if (watchers[order] != null)
watchers[order].resetTime();
}
private synchronized void destroyWatcher(int order) {
if (watchers != null && watchers[order] != null) {
if (watchers[order].hasResent())
resentMessages[order] = resentMessages[order] + 1;
watchers[order].destroy();
watchers[order] = null;
AnalysisLogger.getLogger().info("Destroyed Watcher number " + order);
}
}
public synchronized void destroyAllWatchers() {
if (watchers != null) {
for (int i = 0; i < watchers.length; i++) {
destroyWatcher(i);
}
}
}
public void onMessage(Message message) {
// get message
try {
HashMap<String, Object> details = (HashMap<String, Object>) (HashMap<String, Object>) message.getObjectProperty(ATTRIBUTE.CONTENT.name());
String status = (String) details.get(ATTRIBUTE.STATUS.name());
String order = "" + details.get(ATTRIBUTE.ORDER.name());
String nodeaddress = (String) details.get(ATTRIBUTE.NODE.name());
String msession = (String) details.get(ATTRIBUTE.QSESSION.name());
Object error = details.get(ATTRIBUTE.ERROR.name());
AnalysisLogger.getLogger().info("Current session " + session);
if ((msession != null) && (msession.equals(session))) {
AnalysisLogger.getLogger().info("Session " + session + " is right - acknowledge");
message.acknowledge();
AnalysisLogger.getLogger().info("Session " + session + " acknowledged");
int orderInt = -1;
try {
orderInt = Integer.parseInt(order);
} catch (Exception e3) {
e3.printStackTrace();
}
if (orderInt > -1) {
// reset the watcher
if (computationWatcher!=null)
computationWatcher.reset();
AnalysisLogger.getLogger().info("Task number " + order + " is " + status + " on node " + nodeaddress + " and session " + session);
if (status.equals(ATTRIBUTE.STARTED.name())) {
computingNodes++;
addWatcher(orderInt);
}
if (status.equals(ATTRIBUTE.PROCESSING.name())) {
resetWatcher(orderInt);
} else if (status.equals(ATTRIBUTE.FINISHED.name())) {
totalmessages++;
computingNodes--;
destroyWatcher(orderInt);
if (numberOfMessages > 0)
numberOfMessages--;
AnalysisLogger.getLogger().info("Remaining " + numberOfMessages + " messages to manage");
activeMessages[orderInt] = false;
} else if (status.equals(ATTRIBUTE.FATAL_ERROR.name())) {
if (error!=null)
AnalysisLogger.getLogger().info("REPORTED FATAL_ERROR on " +nodeaddress+" : ");
AnalysisLogger.getLogger().info(error);
computingNodes--;
if (maxFailureTries <= 0) {
AnalysisLogger.getLogger().info("Too much Failures - Aborting");
destroyAllWatchers();
abort();
} else {
AnalysisLogger.getLogger().info("Failure Occurred - Now Resending Message " + orderInt);
resentMessages[orderInt] = resentMessages[orderInt] + 1;
maxFailureTries--;
// resend message
Map<String, Object> retrymessage = generateInputMessage(filenames, fileurls, outputDir, script, arguments.get(orderInt), orderInt, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles, true);
producer.sendMessage(retrymessage, QCONSTANTS.timeToLive);
AnalysisLogger.getLogger().info("Failure Occurred - Resent Message " + orderInt);
}
}
} else
AnalysisLogger.getLogger().info("Ignoring message " + order + " with status " + status);
} else {
AnalysisLogger.getLogger().info("wrong session " + msession + " ignoring message");
// consumer.manager.session.recover();
}
} catch (Exception e) {
AnalysisLogger.getLogger().info("Error reading details ", e);
AnalysisLogger.getLogger().info("...Aborting Job...");
abort();
}
}
}
}

View File

@ -0,0 +1,76 @@
package org.gcube.dataanalysis.executor.job.management;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import javax.jms.Message;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.executor.messagequeue.ATTRIBUTE;
import org.gcube.dataanalysis.executor.messagequeue.Producer;
import org.gcube.dataanalysis.executor.messagequeue.QCONSTANTS;
public class QueueWorkerWatcher {
protected int maxwaitingTime = 2*QueueJobManager.queueWatcherMaxwaitingTime;
private long lastTimeClock;
Timer watcher;
Producer producer;
Map<String, Object> message;
public boolean resent=false;
int order;
public QueueWorkerWatcher(Producer producer, Map<String, Object> message, int order) {
this.producer = producer;
this.message = message;
resent=false;
this.order = order;
watcher = new Timer();
watcher.schedule(new Controller(), 0, QCONSTANTS.refreshStatusTime);
resetTime();
}
public synchronized void resetTime() {
lastTimeClock = System.currentTimeMillis();
}
public synchronized void destroy() {
if (watcher != null) {
watcher.cancel();
watcher.purge();
watcher = null;
}
}
public boolean hasResent(){
return resent;
}
private class Controller extends TimerTask {
@Override
public void run() {
try {
long t0 = System.currentTimeMillis();
AnalysisLogger.getLogger().debug("Watcher "+order+" Timing Is "+(t0 - lastTimeClock)+ " max waiting time: "+maxwaitingTime);
if ((t0 - lastTimeClock) > maxwaitingTime) {
AnalysisLogger.getLogger().info("Watcher "+order+" Time Is Over "+(t0 - lastTimeClock));
AnalysisLogger.getLogger().info("Watcher "+order+" Re-Sending Message "+message);
producer.sendMessage(message, QCONSTANTS.timeToLive);
// QueueJobManager.resentMessages[Integer.parseInt(""+message.get(ATTRIBUTE.ORDER.name()))]=QueueJobManager.resentMessages[Integer.parseInt(""+message.get(ATTRIBUTE.ORDER.name()))]+1;
resent = true;
AnalysisLogger.getLogger().info("Watcher "+order+" Destroying watcher");
destroy();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}

View File

@ -0,0 +1,248 @@
package org.gcube.dataanalysis.executor.job.management;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.apache.axis.message.addressing.EndpointReferenceType;
import org.gcube.common.core.contexts.GHNContext;
import org.gcube.common.core.informationsystem.client.AtomicCondition;
import org.gcube.common.core.informationsystem.client.ISClient;
import org.gcube.common.core.informationsystem.client.RPDocument;
import org.gcube.common.core.informationsystem.client.queries.WSResourceQuery;
import org.gcube.common.core.scope.GCUBEScope;
import org.gcube.common.scope.api.ScopeProvider;
import org.gcube.contentmanagement.blobstorage.service.IClient;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanager.storageclient.wrapper.AccessType;
import org.gcube.contentmanager.storageclient.wrapper.StorageClient;
import org.gcube.dataanalysis.executor.scripts.ScriptIOWorker;
import org.gcube.vremanagement.executor.stubs.ExecutorCall;
import org.gcube.vremanagement.executor.stubs.TaskCall;
import org.gcube.vremanagement.executor.stubs.TaskProxy;
public class RemoteJobManager {
private static String pluginName = "ExecutorScript";
private int actualNumberOfNodes;
private GCUBEScope gscope;
private List<EndpointReferenceType> eprs;
float status;
boolean abort;
boolean shutdown;
protected int activeNodes;
String scope;
public int getActiveNodes() {
return activeNodes;
}
public float getStatus() {
return status;
}
public int getNumberOfNodes() {
return actualNumberOfNodes;
}
public void setNumberOfNodes(int newNumberOfNodes) {
actualNumberOfNodes = newNumberOfNodes;
}
public void init(String scope, int numberOfNodes) throws Exception {
this.scope = scope;
gscope = GCUBEScope.getScope(scope);
AnalysisLogger.getLogger().debug("Using the following scope for this computation:"+gscope);
shutdown = false;
yetuploaded = false;
if (eprs == null)
actualNumberOfNodes = findNodes(scope);
else
actualNumberOfNodes = eprs.size();
if (numberOfNodes < actualNumberOfNodes)
actualNumberOfNodes = numberOfNodes;
}
public RemoteJobManager(String scope, int numberOfNodes) throws Exception {
init(scope, numberOfNodes);
}
public RemoteJobManager(String scope, int numberOfNodes, List<EndpointReferenceType> eprs) throws Exception {
this.eprs = eprs;
init(scope, numberOfNodes);
}
List<String> filenames;
List<String> fileurls;
boolean yetuploaded;
String session;
public boolean uploadAndExecute(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, boolean deletefiles) throws Exception {
boolean executeAll = false;
long t0 = System.currentTimeMillis();
//if not yet uploaded , upload required files
if (!yetuploaded) {
ScopeProvider.instance.set(scope);
IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED).getClient();
// IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, gscope).getClient();
File dir = new File(localDir);
File[] files = dir.listFiles();
AnalysisLogger.getLogger().debug("Start uploading");
filenames = new ArrayList<String>();
fileurls = new ArrayList<String>();
for (File sfile : files) {
String localf = sfile.getAbsolutePath();
String filename = sfile.getName();
String remotef = remoteDir + sfile.getName();
client.put(true).LFile(localf).RFile(remotef);
String url = client.getUrl().RFile(remotef);
AnalysisLogger.getLogger().debug("URL created: " + url);
filenames.add(filename);
fileurls.add(url);
}
AnalysisLogger.getLogger().debug("Upload end");
yetuploaded = true;
session = (""+UUID.randomUUID()).replace("-", "");
}
//if the number of available nodes is higher than zero launch the tasks
if (actualNumberOfNodes > 0) {
AnalysisLogger.getLogger().debug("Executing script on " + actualNumberOfNodes + " nodes");
int len = arguments.size();
List<WorkerWatcher> tasksProxies = new ArrayList<WorkerWatcher>();
activeNodes = 0;
//launch the tasks
for (int i = 0; i < actualNumberOfNodes; i++) {
String argum = "";
//supply the arguments if they are available
if (i < len)
argum = arguments.get(i);
//generate the input map according to the arguments
Map<String, Object> inputs = generateInput(filenames, fileurls, outputDir, script, argum, i, scope, serviceClass, serviceName, owner, remoteDir,session,deletefiles);
AnalysisLogger.getLogger().debug("-> Owner: " + owner + " ServiceClass: " + serviceClass + " ServiceName:" + serviceName + " remoteDir:" + remoteDir);
//take the i-th endpoint of the executor
EndpointReferenceType selectedEPR = eprs.get(i);
AnalysisLogger.getLogger().debug("Launching node " + (i + 1) + " on " + selectedEPR);
//run the executor script
ExecutorCall call = new ExecutorCall(pluginName, gscope);
call.setEndpointReference(selectedEPR);
TaskCall task = null;
task = call.launch(inputs);
TaskProxy proxy = task.getProxy();
tasksProxies.add(new WorkerWatcher(proxy, AnalysisLogger.getLogger()));
AnalysisLogger.getLogger().debug("Launching node " + (i + 1) + " OK on " + selectedEPR);
//add the task to the list in order to reuse it
}
activeNodes = actualNumberOfNodes;
AnalysisLogger.getLogger().debug("Launch Finished - Controlling Status");
int allstatus = 0;
abort = false;
//control the execution: go until there are active nodes or the process must stop
while ((activeNodes != 0) && (!abort) && (!shutdown)) {
//for each node get the task state
int nworkers = tasksProxies.size();
int i=0;
while (i < nworkers) {
WorkerWatcher proxy = tasksProxies.get(i);
String state = proxy.getState();
AnalysisLogger.getLogger().debug("REMOTE JOB MANAGER-> STATE " + state );
//control for aborted computation
abort = ((state == null) || state.equals("FAILED") || (!state.equals("DONE") && !state.equals("RUNNING")));
//control for finished computation
boolean finished = false;
if (state != null)
finished = state.equals("DONE");
//if finished update the active nodes
if (finished) {
tasksProxies.remove(i);
allstatus++;
activeNodes--;
nworkers--;
if (activeNodes == 0)
break;
}
else
i++;
status = Math.min(((float) allstatus / (float) actualNumberOfNodes) * 100f, 95f);
if (abort)
break;
if (shutdown)
break;
// AnalysisLogger.getLogger().debug(String.format("Task " + i + "executed started at %Tc with %s state ", proxy.getStartTime(), state));
//sleep before polling again
Thread.sleep(2000);
}
}
activeNodes = 0;
AnalysisLogger.getLogger().debug("All Tasks have Finished");
if (!abort) {
AnalysisLogger.getLogger().debug("All Task were successful");
/*
* List<StorageObject> listElements = client.showDir().RDir(remoteDir); for (StorageObject obj : listElements) { AnalysisLogger.getLogger().debug("obj stored in directory " + remoteDir + ": " + obj.getName()); }
*/
} else
AnalysisLogger.getLogger().debug("Tasks were NOT successful");
} else
AnalysisLogger.getLogger().debug("Warning: could not execute tasks: No Nodes Available!");
AnalysisLogger.getLogger().debug("Whole procedure done in " + (System.currentTimeMillis() - t0) + " ms");
status = 100f;
return executeAll;
}
public boolean wasAborted() {
return abort;
}
public void stop() {
shutdown = true;
}
private int findNodes(String scopeString) throws Exception {
GCUBEScope scope = GCUBEScope.getScope(scopeString);
ISClient client = GHNContext.getImplementation(ISClient.class);
WSResourceQuery wsquery = client.getQuery(WSResourceQuery.class);
wsquery.addAtomicConditions(new AtomicCondition("//gc:ServiceName", "Executor"));
wsquery.addAtomicConditions(new AtomicCondition("/child::*[local-name()='Task']/name[text()='"+pluginName+"']", pluginName));
List<RPDocument> listdoc = client.execute(wsquery, scope);
EndpointReferenceType epr = null;
eprs = new ArrayList<EndpointReferenceType>();
int numberOfEP = 0;
for (RPDocument resource : listdoc) {
epr = resource.getEndpoint();
numberOfEP++;
eprs.add(epr);
}
AnalysisLogger.getLogger().debug("Found " + numberOfEP + " endpoints");
return numberOfEP;
}
private Map<String, Object> generateInput(Object filenames, Object fileurls, String outputDir, String script, String argum, int i, String scope, String serviceClass, String serviceName, String owner, String remoteDir,String session,boolean deletefiles) {
Map<String, Object> inputs = new HashMap<String, Object>();
inputs.put("FILE_NAMES", filenames);
inputs.put("FILE_URLS", fileurls);
inputs.put("OUTPUTDIR", ScriptIOWorker.toInputString(outputDir));
inputs.put("SCRIPT", ScriptIOWorker.toInputString(script));
inputs.put("ARGUMENTS", ScriptIOWorker.toInputString(argum));
inputs.put("NODE_IDENTIFIER", "" + i);
inputs.put("SCOPE", ScriptIOWorker.toInputString(scope));
inputs.put("SERVICE_CLASS", ScriptIOWorker.toInputString(serviceClass));
inputs.put("SERVICE_NAME", ScriptIOWorker.toInputString(serviceName));
inputs.put("OWNER", ScriptIOWorker.toInputString(owner));
inputs.put("REMOTEDIR", ScriptIOWorker.toInputString(remoteDir));
inputs.put("CLEAN_CACHE",""+deletefiles);
// inputs.put("SESSION", ScriptIO.toInputString(session));
return inputs;
}
}

View File

@ -0,0 +1,37 @@
package org.gcube.dataanalysis.executor.job.management;
import org.apache.log4j.Logger;
import org.gcube.vremanagement.executor.stubs.TaskProxy;
public class WorkerWatcher {
private static int maxTries = 15;
private int currentTries;
private static String runningState = "RUNNING";
private static String failedState = "FAILED";
Logger logger;
TaskProxy proxy;
public WorkerWatcher(TaskProxy proxy, Logger logger){
this.proxy = proxy;
this.logger = logger;
currentTries = 1;
}
public String getState(){
String state ="";
try{
proxy.synchronize();
state = proxy.getState();
return state;
}catch(Exception e){
logger.error("Error in getting state: recover try number "+currentTries,e);
currentTries++;
if (currentTries>maxTries){
return failedState;
}
else return runningState;
}
}
}

View File

@ -0,0 +1,34 @@
package org.gcube.dataanalysis.executor.messagequeue;
public enum ATTRIBUTE {
STATUS,
DONE,
STARTED,
FINISHED,
PROCESSING,
FATAL_ERROR,
TRIVIAL_ERROR,
ORDER,
NODE,
CONTENT,
QSESSION,
TOPIC_NAME,
QUEUE_USER,
QUEUE_PASSWORD,
QUEUE_URL,
TOPIC_RESPONSE_NAME,
ERASE,
FILE_NAMES,
FILE_URLS,
CONFIGURATION,
OUTPUTDIR,
OWNER,
REMOTEDIR,
SERVICE_CLASS,
SERVICE_NAME,
SCOPE,
SCRIPT,
ARGUMENTS,
CLEAN_CACHE,
ERROR
}

View File

@ -0,0 +1,62 @@
package org.gcube.dataanalysis.executor.messagequeue;
import javax.jms.ExceptionListener;
import javax.jms.JMSException;
import javax.jms.MessageConsumer;
import javax.jms.MessageListener;
public class Consumer {
public QueueManager manager;
public MessageConsumer consumer;
private MessageListener consumerCallback;
private ExceptionListener errorCallback;
private String topic;
public Consumer(QueueManager manager, MessageListener consumerCallback, ExceptionListener errorCallback, String topic) throws JMSException, InterruptedException {
this.manager = manager;
this.consumerCallback = consumerCallback;
this.errorCallback = errorCallback;
this.topic = topic;
create();
}
private void create() throws JMSException, InterruptedException {
// Topic ConsumerTopic = manager.session.createTopic(topic);
MessageConsumer consumer = manager.session.createConsumer(manager.destination);
// MessageConsumer consumer = manager.session.createDurableSubscriber(ConsumerTopic, "Consumer."+topic);
// MessageConsumer consumer = manager.session.createConsumer(ConsumerTopic);
manager.connection.setExceptionListener(errorCallback);
consumer.setMessageListener(consumerCallback);
}
public void standBy() throws JMSException {
if (consumer != null)
consumer.close();
}
public void wake() throws Exception {
this.create();
}
public void stop() throws JMSException {
if (consumer != null) {
consumer.close();
}
// closeSession();
}
public void closeSession() throws JMSException {
try {
manager.closeSession();
manager.connection.close();
} catch (Exception e) {
}
}
}

View File

@ -0,0 +1,70 @@
package org.gcube.dataanalysis.executor.messagequeue;
import java.util.UUID;
import javax.jms.DeliveryMode;
import javax.jms.JMSException;
import javax.jms.Message;
import javax.jms.MessageProducer;
import javax.jms.TextMessage;
import javax.jms.Topic;
public class Producer {
public MessageProducer producer;
public QueueManager manager;
public String topic;
public String identifier;
public Producer(QueueManager manager,String topic) throws JMSException {
this.manager = manager;
this.topic = topic;
this.identifier = "" + UUID.randomUUID();
create();
}
private void create() throws JMSException {
// Topic ProducerTopic = manager.session.createTopic(topic);
producer = manager.session.createProducer(manager.destination);
// producer = manager.session.createProducer(ProducerTopic);
producer.setDeliveryMode(DeliveryMode.PERSISTENT);
}
public void sendTextMessage(String text, long timeToLive) throws JMSException {
TextMessage message = manager.session.createTextMessage(text);
producer.setTimeToLive(timeToLive);
producer.send(message);
}
public void sendMessage(Object toSend, long timeToLive) throws JMSException {
Message message = manager.session.createMessage();
message.setObjectProperty(ATTRIBUTE.CONTENT.name(), toSend);
producer.setTimeToLive(timeToLive);
producer.send(message);
}
public void standBy() throws JMSException {
producer.close();
}
public void wake() throws Exception {
this.create();
}
public void stop() throws JMSException {
if (producer != null){
producer.close();
}
// closeSession();
}
public void closeSession() throws JMSException {
try {
manager.closeSession();
manager.connection.close();
} catch (Exception e) {
}
}
}

View File

@ -0,0 +1,8 @@
package org.gcube.dataanalysis.executor.messagequeue;
public class QCONSTANTS {
public static int refreshStatusTime = 60000;
public static int QueueLifeTime = 60000;//3600000;
public static long timeToLive = 0;
}

View File

@ -0,0 +1,77 @@
package org.gcube.dataanalysis.executor.messagequeue;
import java.util.Hashtable;
import java.util.Properties;
import java.util.UUID;
import javax.jms.Connection;
import javax.jms.ConnectionFactory;
import javax.jms.Destination;
import javax.jms.JMSException;
import javax.jms.Session;
import javax.management.ObjectName;
import javax.management.remote.JMXConnector;
import javax.management.remote.JMXConnectorFactory;
import javax.management.remote.JMXServiceURL;
import javax.naming.Context;
import javax.naming.InitialContext;
import org.apache.activemq.ActiveMQConnectionFactory;
import org.apache.activemq.broker.BrokerService;
import org.apache.activemq.broker.jmx.QueueViewMBean;
public class QueueManager {
public ActiveMQConnectionFactory connectionFactory;
public Connection connection;
public Session session;
public Destination destination;
boolean transacted = false;
public String mqurl;
private String identifier;
public void createAndConnect(String user,String password, String mqurl, String queueName) throws JMSException {
this.mqurl=mqurl;
connect(user,password,mqurl);
session = connection.createSession(transacted, Session.CLIENT_ACKNOWLEDGE);
this.identifier = ""+UUID.randomUUID();
/*
Hashtable properties = new Hashtable();
properties.put(Context.INITIAL_CONTEXT_FACTORY, "org.apache.activemq.jndi.ActiveMQInitialContextFactory");
properties.put(Context.PROVIDER_URL, mqurl);
InitialContext context = new InitialContext(properties);
ConnectionFactory factory = (ConnectionFactory) context.lookup("ConnectionFactory");
destination = (Destination) context.lookup(queueName);
*/
// destination = session.createQueue(queueName+"?consumer.prefetchSize=3");
destination = session.createQueue(queueName+"?wireFormat.maxInactivityDurationInitalDelay=3600000&requestTimeout=240000&wireFormat.maxInactivityDuration=3600000");
}
public void destroy(){
}
private void connect(String user,String password, String mqurl) throws JMSException{
connectionFactory = new ActiveMQConnectionFactory(user, password, mqurl);
connectionFactory.getPrefetchPolicy().setQueuePrefetch(1);
// Properties p = new Properties();
// p.put("persistent", "false");
// p.put("consumer.prefetchSize", "3");
// p.put("ms.prefetchPolicy.all", "3");
// p.put("cms.PrefetchPolicy.queuePrefetch", "3");
// connectionFactory.setProperties(p);
connection = connectionFactory.createConnection();
connection.setClientID(identifier);
connection.start();
}
public void closeSession() throws Exception{
// session.unsubscribe(identifier);
session.close();
}
}

View File

@ -0,0 +1,19 @@
package org.gcube.dataanalysis.executor.nodes.algorithms;
public class AquamapsNative2050Node extends AquamapsNativeNode{
public AquamapsNative2050Node(){
super();
type = "2050";
}
public String getName() {
return "AQUAMAPS_NATIVE_2050";
}
public String getDescription() {
return "Algorithm for Native Range in 2050 by Aquamaps on a single node";
}
}

View File

@ -0,0 +1,70 @@
package org.gcube.dataanalysis.executor.nodes.algorithms;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import org.gcube.contentmanagement.graphtools.utils.MathFunctions;
import org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative;
public class AquamapsNativeNode extends AquamapsSuitableNode{
public AquamapsNativeNode(){
super();
}
public String getName() {
return "AQUAMAPS_NATIVE";
}
public String getDescription() {
return "Algorithm for Native Range by Aquamaps on a single node";
}
// writes the distribution model on the DB: input species vector + list of areas vectors to report
public void singleStepPostprocess(Object species) {
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Analyzing Species distribution");
// write info on DB
Queue<String> rows = new ConcurrentLinkedQueue<String>();
String speciesID = AquamapsSuitableFunctions.getMainInfoID(species);
Map<String, Float> csquaresMap = operations.completeDistribution.get(speciesID);
if (csquaresMap != null) {
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Getting csquare probabilites");
// write only processed areas
for (String singleCsquare : csquaresMap.keySet()) {
String additionalInformation = operations.getAdditionalInformation(species, operations.processedAreas.get(singleCsquare));
if (additionalInformation == null)
additionalInformation = "";
else if (additionalInformation.length() > 0)
additionalInformation = "," + additionalInformation.trim();
float prob = 0f;
try {
prob = csquaresMap.get(singleCsquare);
} catch (Exception e) {
System.out.println("Aquamaps Algorithm Single Step PostProcess ->Error in getting probability value at " + speciesID + " , " + singleCsquare);
}
if (prob > 0)
rows.offer("'" + speciesID + "','" + singleCsquare + "','" + MathFunctions.roundDecimal(prob, 3) + "'" + additionalInformation);
}
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Filtering probabilities. Size:"+rows.size());
Queue<String> newrows = new AquamapsNative().filterProbabilitySet(rows);
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Filtered probabilities. Size:"+newrows.size());
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Writing rows on DB");
List<String> toWrite = new ArrayList<String>();
for (String row:newrows){
toWrite.add(row);
// System.out.println("Added row: "+row);
}
AquamapsSuitableFunctions.writeOnDB(toWrite, currentconfig.getParam("DistributionTable"), dbHibConnection);
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Rows written on DB");
}
else
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Probability distribution is void");
}
}

View File

@ -0,0 +1,19 @@
package org.gcube.dataanalysis.executor.nodes.algorithms;
public class AquamapsSuitable2050Node extends AquamapsSuitableNode{
public AquamapsSuitable2050Node(){
super();
type = "2050";
}
public String getName() {
return "AQUAMAPS_SUITABLE_2050";
}
public String getDescription() {
return "Algorithm for Suitable Range in 2050 by Aquamaps on a single node";
}
}

View File

@ -0,0 +1,200 @@
package org.gcube.dataanalysis.executor.nodes.algorithms;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.ObjectInputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsAlgorithmCore;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.utils.Transformations;
import org.hibernate.SessionFactory;
import com.thoughtworks.xstream.XStream;
public class AquamapsSuitableFunctions {
public static String countAllSpeciesQuery = "select count(*) from %1$s;";
// public static String countAll = "select count(*) from %1$s;";
public static String countAll = "EXPLAIN SELECT * FROM %1$s;";
public static String countCsquareCodeQuery = "select count (*) from %1$s d where oceanarea>0";
public static String selectAllSpeciesQuery = "select depthmin,meandepth,depthprefmin,pelagic,depthprefmax,depthmax,tempmin,layer,tempprefmin,tempprefmax,tempmax,salinitymin,salinityprefmin,salinityprefmax,salinitymax,primprodmin,primprodprefmin,primprodprefmax,primprodmax,iceconmin,iceconprefmin,iceconprefmax,iceconmax,landdistyn,landdistmin,landdistprefmin,landdistprefmax,landdistmax,nmostlat,smostlat,wmostlong,emostlong,faoareas,speciesid from %1$s order by speciesid limit %2$s offset %3$s;";
public static String csquareCodeQuery = "select csquarecode,depthmean,depthmax,depthmin, sstanmean,sbtanmean,salinitymean,salinitybmean, primprodmean,iceconann,landdist,oceanarea,centerlat,centerlong,faoaream,eezall,lme from %1$s d where oceanarea>0 order by csquarecode limit %2$s offset %3$s";
public static String createTableStatement = "CREATE TABLE %1$s ( speciesid character varying, csquarecode character varying, probability real, boundboxyn smallint, faoareayn smallint, faoaream integer, eezall character varying, lme integer) WITH (OIDS=FALSE ) #TABLESPACE#; CREATE INDEX CONCURRENTLY %1$s_idx ON %1$s USING btree (speciesid, csquarecode, faoaream, eezall, lme);";
public static String metainfo = "boundboxyn, faoareayn, faoaream, eezall, lme";
public static String selectAllSpeciesObservationQuery = "SELECT speciesid,maxclat,minclat from %1$s;";
public static String probabilityInsertionStatement = "insert into %1$s (speciesid,csquarecode,probability %ADDEDINFORMATION%) VALUES %2$s";
public static String deleteDuplicates = "delete from %1$s where speciesid='%2$s'";
// Default Files
private static String speciesFile = "species.dat";
private static String csquaresFile = "csquares.dat";
private static String maxminlatFile = "maxminlat.dat";
private static String configFile = "config.dat";
// file1
public HashMap<String, List<Object>> allSpeciesObservations;
// file2
public List<Object> speciesVectors;
// file3
public List<Object> environmentVectors;
public int numberOfSpecies;
public int numberOfCells;
//processing variables
public AlgorithmConfiguration currentconfig;
public HashMap<String, String> currentSpeciesBoundingBoxInfo;
public String currentFAOAreas;
public AquamapsAlgorithmCore core;
public String type;
public HashMap<String, Object> processedAreas;
public ConcurrentHashMap<String, Map<String, Float>> completeDistribution;
public AquamapsSuitableFunctions(AquamapsAlgorithmCore core, String type, AlgorithmConfiguration config) {
this.core = core;
this.type = type;
this.currentconfig = config;
}
//PROBABILITY CALCULATION
// calculates probability and takes into account the processes areas by this node
public float calcProb(Object species, Object area) {
float prob = (float) core.getSpeciesProb((Object[]) species, (Object[]) area);
String speciesID = getMainInfoID(species);
String csquareCode = getGeographicalID(area);
if (completeDistribution == null)
completeDistribution = new ConcurrentHashMap<String, Map<String, Float>>();
Map<String, Float> geoDistrib = completeDistribution.get(speciesID);
// if the map is null then generate a new map, otherwise update it
if (geoDistrib == null) {
geoDistrib = new ConcurrentHashMap<String, Float>();
completeDistribution.put(speciesID, geoDistrib);
}
if (prob > 0.1) {
// record the overall probability distribution
geoDistrib.put(csquareCode, prob);
if (processedAreas == null)
processedAreas = new HashMap<String, Object>();
processedAreas.put(csquareCode, area);
}
return prob;
}
//BOUNDING BOX CALCULATION
// calculates the bounding box information
public HashMap<String, Integer> calculateBoundingBox(Object[] csquarecode) {
HashMap<String, Integer> boundingInfo = core.calculateBoundingBox("" + csquarecode[0], currentSpeciesBoundingBoxInfo.get("$pass_NS"), currentSpeciesBoundingBoxInfo.get("$pass_N"), currentSpeciesBoundingBoxInfo.get("$pass_S"), AquamapsAlgorithmCore.getElement(csquarecode, 12),// centerlat
AquamapsAlgorithmCore.getElement(csquarecode, 13),// centerlong
AquamapsAlgorithmCore.getElement(csquarecode, 14),// faoaream
currentSpeciesBoundingBoxInfo.get("$paramData_NMostLat"), currentSpeciesBoundingBoxInfo.get("$paramData_SMostLat"), currentSpeciesBoundingBoxInfo.get("$paramData_WMostLong"), currentSpeciesBoundingBoxInfo.get("$paramData_EMostLong"), currentFAOAreas, currentSpeciesBoundingBoxInfo.get("$northern_hemisphere_adjusted"), currentSpeciesBoundingBoxInfo.get("$southern_hemisphere_adjusted"));
return boundingInfo;
}
// initializes currentFAOAreas and currentSpeciesBoundingBoxInfo
public void getBoundingBoxInformation(Object[] speciesInfoRow, Object[] speciesObservations) {
Object[] row = speciesInfoRow;
String $paramData_NMostLat = AquamapsAlgorithmCore.getElement(row, 28);
String $paramData_SMostLat = AquamapsAlgorithmCore.getElement(row, 29);
String $paramData_WMostLong = AquamapsAlgorithmCore.getElement(row, 30);
String $paramData_EMostLong = AquamapsAlgorithmCore.getElement(row, 31);
currentFAOAreas = AquamapsAlgorithmCore.getElement(row, 32);
// adjust FAO areas
currentFAOAreas = core.procFAO_2050(currentFAOAreas);
// get Bounding Box Information
// System.out.println("TYPE:"+type);
currentSpeciesBoundingBoxInfo = core.getBoundingBoxInfo($paramData_NMostLat, $paramData_SMostLat, $paramData_WMostLong, $paramData_EMostLong, speciesObservations, type);
// end of get BoundingBoxInformation
}
// DATABASE INTERACTION
public static void writeOnDB(List<String> buffer, String destinationTable, SessionFactory dbHibConnection) {
int endIndex = buffer.size();
if (endIndex > 0) {
System.out.println("\tWriting Buffer is not empty: "+endIndex);
String $probabilityInsertionStatement = AquamapsSuitableFunctions.probabilityInsertionStatement.replace("%ADDEDINFORMATION%", ","+metainfo);
StringBuffer sb = new StringBuffer();
// System.out.println("writeOnDB()->PROBABILITIES BUFFER SIZE DELETION");
for (int i = 0; i < endIndex; i++) {
sb.append("(" + buffer.get(i) + ")");
if (i < endIndex - 1) {
sb.append(",");
}
}
String insertionString = String.format($probabilityInsertionStatement, destinationTable, sb.toString());
try {
// System.out.println(insertionString);
DatabaseFactory.executeSQLUpdate(insertionString, dbHibConnection);
} catch (Exception e) {
e.printStackTrace();
}
}
else
System.out.println("\tWarning : writing buffer is empty!");
System.out.println("\tWriting on DB FINISHED");
}
//FILES MANAGEMENT
public void dumpAll(String path) throws Exception {
Transformations.dumpObjectToFile(path + configFile, currentconfig);
// Transformations.dumpObjectToFile(path + csquaresFile, environmentVectors);
}
public void rebuildConfig(String configFile) throws Exception{
FileInputStream fis = new FileInputStream(new File(configFile));
currentconfig = (AlgorithmConfiguration) new XStream().fromXML(fis);
fis.close();
}
// when uploaded the files will be local
public void rebuildAll(int cellOrdinal, int chunksize, int speciesOrdinal, int speciesChunkSize, String pathToFiles) throws Exception {
// currentconfig = (AlgorithmConfiguration) Transformations.getObjectFromFile(pathToFiles+configFile);
/*
try{
environmentVectors = (List<Object>) Transformations.getObjectFromFile(pathToFiles+csquaresFile);
}catch(Exception e){
System.out.println("\tError in retrieving environmental vectors");
}
*/
}
public String getAdditionalInformation(Object species, Object area) {
Object[] arearray = (Object[]) area;
HashMap<String, Integer> boundingInfo = calculateBoundingBox(arearray);
String addedInformation = "'" + boundingInfo.get("$InBox") + "','" + boundingInfo.get("$InFAO") + "','" + arearray[14] + "','" + arearray[15] + "','" + arearray[16] + "'";
return addedInformation;
}
//AUXILIARY FUNCTIONS
public static String getMainInfoID(Object speciesInfo) {
String s = "" + ((Object[]) speciesInfo)[33];
return s;
}
public static String getGeographicalID(Object geoInfo) {
String s = "" + ((Object[]) geoInfo)[0];
return s;
}
}

Some files were not shown because too many files have changed in this diff Show More