Branching to have a new version depending on new SmartExecutor
git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngineSmartExecutor@112013 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
commit
8c8d1c3167
|
@ -0,0 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry kind="src" output="target/classes" path="src/main/java"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
|
||||
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"/>
|
||||
<classpathentry kind="output" path="target/classes"/>
|
||||
</classpath>
|
|
@ -0,0 +1,23 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>EcologicalEngineExecutor</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.m2e.core.maven2Builder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.m2e.core.maven2Nature</nature>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
|
@ -0,0 +1,4 @@
|
|||
#Fri Jun 22 18:05:41 CEST 2012
|
||||
eclipse.preferences.version=1
|
||||
encoding//src/main/java=UTF-8
|
||||
encoding/<project>=UTF-8
|
|
@ -0,0 +1,13 @@
|
|||
#Fri Jun 22 18:05:41 CEST 2012
|
||||
eclipse.preferences.version=1
|
||||
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
|
||||
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
|
||||
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
|
||||
org.eclipse.jdt.core.compiler.compliance=1.6
|
||||
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
|
||||
org.eclipse.jdt.core.compiler.debug.localVariable=generate
|
||||
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
|
||||
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
|
||||
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
|
||||
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
|
||||
org.eclipse.jdt.core.compiler.source=1.6
|
|
@ -0,0 +1,5 @@
|
|||
#Fri Jun 22 17:51:31 CEST 2012
|
||||
activeProfiles=
|
||||
eclipse.preferences.version=1
|
||||
resolveWorkspaceProjects=true
|
||||
version=1
|
|
@ -0,0 +1,17 @@
|
|||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<hibernate-configuration>
|
||||
<session-factory>
|
||||
<property name="connection.driver_class">org.postgresql.Driver</property>
|
||||
<property name="connection.provider_class">org.hibernate.connection.C3P0ConnectionProvider</property>
|
||||
<property name="connection.url">jdbc:postgresql://localhost/testdb</property>
|
||||
<property name="connection.username">gcube</property>
|
||||
<property name="connection.password">d4science2</property>
|
||||
<property name="dialect">org.hibernate.dialect.PostgreSQLDialect</property>
|
||||
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
|
||||
<property name="c3p0.timeout">0</property>
|
||||
<property name="c3p0.max_size">1</property>
|
||||
<property name="c3p0.max_statements">0</property>
|
||||
<property name="c3p0.min_size">1</property>
|
||||
<property name="current_session_context_class">thread</property>
|
||||
</session-factory>
|
||||
</hibernate-configuration>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,5 @@
|
|||
#!/bin/sh
|
||||
# AQUAMAPS_SUITABLE
|
||||
cd $1
|
||||
|
||||
java -Xmx1024M -classpath ./:./aquamapsnode.jar:./c3p0-0.9.1.2.jar:./commons-collections-3.1.jar:./dom4j-1.6.1.jar:./ecologicalDataMining.jar:./hibernate3.jar:./jaxen-1.1.2.jar:./jta-1.1.jar:./log4j-1.2.16.jar:./postgresql-8.4-702.jdbc4.jar:./slf4j-api-1.6.0.jar:./slf4j-log4j12-1.6.0.jar:./xpp3_min-1.1.4c.jar:./xstream-1.3.1.jar org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsSuitableNode $2 execution.output
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,696 @@
|
|||
##--------------------------------------------------------
|
||||
## CMSY analysis with estimation of total biomass, including Bayesian Schaefer
|
||||
## written by Rainer Froese with support from Gianpaolo Coro in 2013-2014
|
||||
## This version adjusts biomass to average biomass over the year
|
||||
## It also contains the FutureCrash option to improve prediction of final biomass
|
||||
## Version 21 adds the purple point to indicate the 25th percentile of final biomass
|
||||
## Version 22 accepts that no biomass or CPUE area available
|
||||
##--------------------------------------------------------
|
||||
library(R2jags) # Interface with JAGS
|
||||
library(coda)
|
||||
|
||||
#-----------------------------------------
|
||||
# Some general settings
|
||||
#-----------------------------------------
|
||||
# set.seed(999) # use for comparing results between runs
|
||||
rm(list=ls(all=TRUE)) # clear previous variables etc
|
||||
options(digits=3) # displays all numbers with three significant digits as default
|
||||
graphics.off() # close graphics windows from previous sessions
|
||||
|
||||
#-----------------------------------------
|
||||
# General settings for the analysis
|
||||
#-----------------------------------------
|
||||
sigR <- 0.02 # overall process error; 0.05 works reasonable for simulations, 0.02 for real data; 0 if deterministic model
|
||||
n <- 10000 # initial number of r-k pairs
|
||||
batch.mode <- T # set to TRUE to suppress graphs
|
||||
write.output <- T # set to true if table of output is wanted
|
||||
FutureCrash <- "No"
|
||||
|
||||
#-----------------------------------------
|
||||
# Start output to screen
|
||||
#-----------------------------------------
|
||||
cat("-------------------------------------------\n")
|
||||
cat("Catch-MSY Analysis,", date(),"\n")
|
||||
cat("-------------------------------------------\n")
|
||||
|
||||
#------------------------------------------
|
||||
# Read data and assign to vectors
|
||||
#------------------------------------------
|
||||
# filename_1 <- "AllStocks_Catch4.csv"
|
||||
# filename_2 <- "AllStocks_ID4.csv"
|
||||
# filename_1 <- "SimCatch.csv"
|
||||
# filename_2 <- "SimSpec.csv"
|
||||
# filename_2 <- "SimSpecWrongS.csv"
|
||||
# filename_2 <- "SimSpecWrongI.csv"
|
||||
# filename_2 <- "SimSpecWrongF.csv"
|
||||
# filename_2 <- "SimSpecWrongH.csv"
|
||||
# filename_2 <- "SimSpecWrongL.csv"
|
||||
# filename_1 <- "FishDataLim.csv"
|
||||
# filename_2 <- "FishDataLimSpec.csv"
|
||||
filename_1 <- "WKLIFE4Stocks.csv"
|
||||
filename_2 <- "WKLIFE4ID.csv"
|
||||
|
||||
outfile<-"outfile"
|
||||
outfile.txt <- "outputfile.txt"
|
||||
|
||||
cdat <- read.csv(filename_1, header=T, dec=".", stringsAsFactors = FALSE)
|
||||
cinfo <- read.csv(filename_2, header=T, dec=".", stringsAsFactors = FALSE)
|
||||
cat("Files", filename_1, ",", filename_2, "read successfully","\n")
|
||||
|
||||
# Stocks with total biomass data and catch data from StartYear to EndYear
|
||||
# stocks <- sort(as.character(cinfo$stock)) # All stocks
|
||||
stocks<-"HLH_M07"
|
||||
|
||||
# select one stock after the other
|
||||
for(stock in stocks) {
|
||||
# assign data from cinfo to vectors
|
||||
res <- as.character(cinfo$Resilience[cinfo$stock==stock])
|
||||
StartYear <- as.numeric(cinfo$StartYear[cinfo$stock==stock])
|
||||
EndYear <- as.numeric(cinfo$EndYear[cinfo$stock==stock])
|
||||
r_low <- as.numeric(cinfo$r_low[cinfo$stock==stock])
|
||||
r_hi <- as.numeric(cinfo$r_hi[cinfo$stock==stock])
|
||||
stb_low <- as.numeric(cinfo$stb_low[cinfo$stock==stock])
|
||||
stb_hi <- as.numeric(cinfo$stb_hi[cinfo$stock==stock])
|
||||
intyr <- as.numeric(cinfo$intyr[cinfo$stock==stock])
|
||||
intbio_low <- as.numeric(cinfo$intbio_low[cinfo$stock==stock])
|
||||
intbio_hi <- as.numeric(cinfo$intbio_hi[cinfo$stock==stock])
|
||||
endbio_low <- as.numeric(cinfo$endbio_low[cinfo$stock==stock])
|
||||
endbio_hi <- as.numeric(cinfo$endbio_hi[cinfo$stock==stock])
|
||||
Btype <- as.character(cinfo$Btype[cinfo$stock==stock])
|
||||
FutureCrash <- as.character(cinfo$FutureCrash[cinfo$stock==stock])
|
||||
comment <- as.character(cinfo$comment[cinfo$stock==stock])
|
||||
|
||||
|
||||
# extract data on stock
|
||||
yr <- as.numeric(cdat$yr[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])
|
||||
ct <- as.numeric(cdat$ct[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that catch is given in tonnes, transforms to '000 tonnes
|
||||
if(Btype=="observed" | Btype=="CPUE" | Btype=="simulated") {
|
||||
bt <- as.numeric(cdat$TB[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that biomass is in tonnes, transforms to '000 tonnes
|
||||
} else {bt <- NA}
|
||||
nyr <- length(yr) # number of years in the time series
|
||||
|
||||
|
||||
if(Btype!="observed") {bio <- bt}
|
||||
# change biomass to moving average as assumed by Schaefer (but not for simulations or CPUE)
|
||||
# for last year use reported bio
|
||||
if(Btype=="observed") {
|
||||
ma <- function(x){filter(x,rep(1/2,2),sides=2)}
|
||||
bio <- ma(bt)
|
||||
bio[length(bio)] <- bt[length(bt)] }
|
||||
|
||||
# initialize vectors for viable r, k, bt
|
||||
rv.all <- vector()
|
||||
kv.all <- vector()
|
||||
btv.all <- matrix(data=vector(),ncol=nyr+1)
|
||||
|
||||
|
||||
|
||||
#----------------------------------------------------
|
||||
# Determine initial ranges for parameters and biomass
|
||||
#----------------------------------------------------
|
||||
# initial range of r from input file
|
||||
if(is.na(r_low)==F & is.na(r_hi)==F) {
|
||||
start_r <- c(r_low,r_hi)
|
||||
} else {
|
||||
# initial range of r and CatchMult values based on resilience
|
||||
if(res == "High") {
|
||||
start_r <- c(0.6,1.5)} else if(res == "Medium") {
|
||||
start_r <- c(0.2,0.8)} else if(res == "Low") {
|
||||
start_r <- c(0.05,0.5)} else { # i.e. res== "Very low"
|
||||
start_r <- c(0.015,0.1)}
|
||||
}
|
||||
|
||||
|
||||
# initial range of k values, assuming k will always be larger than max catch
|
||||
# and max catch will never be smaller than a quarter of MSY
|
||||
|
||||
start_k <- c(max(ct),16*max(ct)/start_r[1])
|
||||
|
||||
# initial biomass range from input file
|
||||
if(is.na(stb_low)==F & is.na(stb_hi)==F) {
|
||||
startbio <- c(stb_low,stb_hi)
|
||||
} else {
|
||||
# us low biomass at start as default
|
||||
startbio <- c(0.1,0.5)
|
||||
}
|
||||
|
||||
MinYear <- yr[which.min(ct)]
|
||||
MaxYear <- yr[which.max(ct)]
|
||||
# use year and biomass range for intermediate biomass from input file
|
||||
if(is.na(intbio_low)==F & is.na(intbio_hi)==F) {
|
||||
intyr <- intyr
|
||||
intbio <- c(intbio_low,intbio_hi)
|
||||
# else if year of minimum catch is at least 3 years away from StartYear and EndYear of series, use min catch
|
||||
} else if((MinYear - StartYear) > 3 & (EndYear - MinYear) > 3 ) {
|
||||
# assume that biomass range in year before minimum catch was 0.01 - 0.4
|
||||
intyr <- MinYear-1
|
||||
intbio <- c(0.01,0.4)
|
||||
# else if year of max catch is at least 3 years away from StartYear and EndYear of series, use max catch
|
||||
} else if((MaxYear - StartYear) > 3 & (EndYear - MaxYear) > 3 ) {
|
||||
# assume that biomass range in year before maximum catch was 0.3 - 0.9
|
||||
intyr <- MaxYear-1
|
||||
intbio <- c(0.3,0.9)
|
||||
} else {
|
||||
# assume uninformative range 0-1 in mid-year
|
||||
intyr <- as.integer(mean(c(StartYear, EndYear)))
|
||||
intbio <- c(0,1) }
|
||||
# end of intbio setting
|
||||
|
||||
# final biomass range from input file
|
||||
if(is.na(endbio_low)==F & is.na(endbio_hi)==F) {
|
||||
endbio <- c(endbio_low,endbio_hi)
|
||||
} else {
|
||||
# else use Catch/maxCatch to estimate final biomass
|
||||
endbio <- if(ct[nyr]/max(ct) > 0.5) {c(0.4,0.8)} else {c(0.01,0.4)}
|
||||
} # end of final biomass setting
|
||||
|
||||
|
||||
#----------------------------------------------
|
||||
# MC with Schaefer Function filtering
|
||||
#----------------------------------------------
|
||||
Schaefer <- function(ri, ki, startbio, intyr, intbio, endbio, sigR, pt) {
|
||||
|
||||
# if stock is not expected to crash within 3 years if last catch continues
|
||||
if(FutureCrash == "No") {
|
||||
yr.s <- c(yr,EndYear+1,EndYear+2,EndYear+3)
|
||||
ct.s <- c(ct,ct[yr==EndYear],ct[yr==EndYear],ct[yr==EndYear])
|
||||
nyr.s <- length(yr.s)
|
||||
} else{
|
||||
yr.s <- yr
|
||||
ct.s <- ct
|
||||
nyr.s <- nyr
|
||||
}
|
||||
|
||||
# create vector for initial biomasses
|
||||
startbt <-seq(from =startbio[1], to=startbio[2], by = (startbio[2]-startbio[1])/10)
|
||||
# create vectors for viable r, k and bt
|
||||
rv <- array(-1:-1,dim=c(length(ri)*length(startbt))) #initialize array with -1. The -1 remaining after the process will be removed
|
||||
kv <- array(-1:-1,dim=c(length(ri)*length(startbt)))
|
||||
btv <- matrix(data=NA, nrow = (length(ri)*length(startbt)), ncol = nyr+1)
|
||||
intyr.i <- which(yr.s==intyr) # get index of intermediate year
|
||||
|
||||
#loop through r-k pairs
|
||||
npoints = length(ri)
|
||||
nstartb = length(startbt)
|
||||
|
||||
for(i in 1 : npoints) {
|
||||
if (i%%1000==0)
|
||||
cat(".")
|
||||
|
||||
# create empty vector for annual biomasses
|
||||
bt <- vector()
|
||||
|
||||
# loop through range of relative start biomasses
|
||||
for(j in startbt) {
|
||||
# set initial biomass, including process error
|
||||
bt[1]=j*ki[i]*exp(rnorm(1,0, sigR)) ## set biomass in first year
|
||||
|
||||
#loop through years in catch time series
|
||||
for(t in 1:nyr.s) { # for all years in the time series
|
||||
xt=rnorm(1,0, sigR) # set new random process error for every year
|
||||
|
||||
# calculate biomass as function of previous year's biomass plus surplus production minus catch
|
||||
bt[t+1]=(bt[t]+ri[i]*bt[t]*(1-bt[t]/ki[i])-ct.s[t])*exp(xt)
|
||||
|
||||
# if biomass < 0.01 k or > 1.1 k, discard r-k pair
|
||||
if(bt[t+1] < 0.01*ki[i] || bt[t+1] > 1.1*ki[i]) { break } # stop looping through years, go to next upper level
|
||||
|
||||
if ((t+1)==intyr.i && (bt[t+1]>(intbio[2]*ki[i]) || bt[t+1]<(intbio[1]*ki[i]))) { break } #intermediate year check
|
||||
|
||||
} # end of loop of years
|
||||
|
||||
# if last biomass falls without expected ranges goto next r-k pair
|
||||
if(t < nyr.s || bt[yr.s==EndYear] > (endbio[2]*ki[i]) || bt[yr.s==EndYear] < (endbio[1]*ki[i])) {
|
||||
next } else {
|
||||
# store r, k, and bt, plot point, then go to next startbt
|
||||
rv[((i-1)*nstartb)+j] <- ri[i]
|
||||
kv[((i-1)*nstartb)+j] <- ki[i]
|
||||
btv[((i-1)*nstartb)+j,] <- bt[1:(nyr+1)]/ki[i] #substitute a row into the matrix, exclude FutureCrash years
|
||||
if(pt==T) {points(x=ri[i], y=ki[i], pch=".", cex=2, col="black")
|
||||
next }
|
||||
}
|
||||
} # end of loop of initial biomasses
|
||||
} # end of loop of r-k pairs
|
||||
|
||||
rv=rv[rv!=-1]
|
||||
kv=kv[kv!=-1]
|
||||
btv=na.omit(btv) #delete first line
|
||||
|
||||
cat("\n")
|
||||
return(list(rv, kv,btv))
|
||||
} # end of Schaefer function
|
||||
|
||||
#------------------------------------------------------------------
|
||||
# Uniform sampling of the r-k space
|
||||
#------------------------------------------------------------------
|
||||
# get random set of r and k from log space distribution
|
||||
ri1 = exp(runif(n, log(start_r[1]), log(start_r[2])))
|
||||
ki1 = exp(runif(n, log(start_k[1]), log(start_k[2])))
|
||||
|
||||
#-----------------------------------------------------------------
|
||||
# Plot data and progress
|
||||
#-----------------------------------------------------------------
|
||||
#windows(14,9)
|
||||
par(mfcol=c(2,3))
|
||||
# plot catch
|
||||
plot(x=yr, y=ct, ylim=c(0,1.2*max(ct)), type ="l", bty="l", main=paste(stock,"catch"), xlab="Year",
|
||||
ylab="Catch", lwd=2)
|
||||
points(x=yr[which.max(ct)], y=max(ct), col="red", lwd=2)
|
||||
points(x=yr[which.min(ct)], y=min(ct), col="red", lwd=2)
|
||||
|
||||
# plot r-k graph
|
||||
plot(ri1, ki1, xlim = start_r, ylim = start_k, log="xy", xlab="r", ylab="k", main="Finding viable r-k", pch=".", cex=2, bty="l", col="lightgrey")
|
||||
|
||||
#1 - Call MC-Schaefer function to preliminary explore the space without prior information
|
||||
cat(stock, ": First Monte Carlo filtering of r-k space with ",n," points\n")
|
||||
MCA <- Schaefer(ri=ri1, ki=ki1, startbio=startbio, intyr=intyr, intbio=intbio, endbio=endbio, sigR=sigR, pt=T)
|
||||
rv.all <- append(rv.all,MCA[[1]])
|
||||
kv.all <- append(kv.all,MCA[[2]])
|
||||
btv.all <- rbind(btv.all,MCA[[3]])
|
||||
#take viable r and k values
|
||||
nviablepoints = length(rv.all)
|
||||
cat("* Found ",nviablepoints," viable points from ",n," samples\n");
|
||||
|
||||
|
||||
#if few points were found then resample and shrink the k log space
|
||||
if (nviablepoints<=1000){
|
||||
log.start_k.new <- log(start_k)
|
||||
max_attempts = 3
|
||||
current_attempts = 1
|
||||
while (nviablepoints<=1000 && current_attempts<=max_attempts){
|
||||
if(nviablepoints > 0) {
|
||||
log.start_k.new[1] <- mean(c(log.start_k.new[1], min(log(kv.all))))
|
||||
log.start_k.new[2] <- mean(c(log.start_k.new[2], max(log(kv.all)))) }
|
||||
n.new=n*current_attempts #add more points
|
||||
ri1 = exp(runif(n.new, log(start_r[1]), log(start_r[2])))
|
||||
ki1 = exp(runif(n.new, log.start_k.new[1], log.start_k.new[2]))
|
||||
cat("Shrinking k space: repeating Monte Carlo in the interval [",exp(log.start_k.new[1]),",",exp(log.start_k.new[2]),"]\n")
|
||||
cat("Attempt ",current_attempts," of ",max_attempts," with ",n.new," points","\n")
|
||||
MCA <- Schaefer(ri=ri1, ki=ki1, startbio=startbio, intyr=intyr, intbio=intbio, endbio=endbio, sigR=sigR, pt=T)
|
||||
rv.all <- append(rv.all,MCA[[1]])
|
||||
kv.all <- append(kv.all,MCA[[2]])
|
||||
btv.all <- rbind(btv.all,MCA[[3]])
|
||||
nviablepoints = length(rv.all) #recalculate viable points
|
||||
cat("* Found altogether",nviablepoints," viable points \n");
|
||||
current_attempts=current_attempts+1 #increment the number of attempts
|
||||
}
|
||||
}
|
||||
|
||||
# If tip of viable r-k pairs is 'thin', do extra sampling there
|
||||
gm.rv = exp(mean(log(rv.all)))
|
||||
if(length(rv.all[rv.all > 0.9*start_r[2]]) < 10) {
|
||||
l.sample.r <- (gm.rv + max(rv.all))/2
|
||||
cat("Final sampling in the tip area above r =",l.sample.r,"\n")
|
||||
log.start_k.new <- c(log(0.8*min(kv.all)),log(max(kv.all[rv.all > l.sample.r])))
|
||||
ri1 = exp(runif(50000, log(l.sample.r), log(start_r[2])))
|
||||
ki1 = exp(runif(50000, log.start_k.new[1], log.start_k.new[2]))
|
||||
MCA <- Schaefer(ri=ri1, ki=ki1, startbio=startbio, intyr=intyr, intbio=intbio, endbio=endbio, sigR=sigR, pt=T)
|
||||
rv.all <- append(rv.all,MCA[[1]])
|
||||
kv.all <- append(kv.all,MCA[[2]])
|
||||
btv.all <- rbind(btv.all,MCA[[3]])
|
||||
nviablepoints = length(rv.all) #recalculate viable points
|
||||
cat("Found altogether", length(rv.all), "unique viable r-k pairs and biomass trajectories\n")
|
||||
}
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Bayesian analysis of catch & biomass with Schaefer model
|
||||
# ------------------------------------------------------------
|
||||
if(Btype == "observed" | Btype=="simulated") {
|
||||
cat("Running Schaefer MCMC analysis....\n")
|
||||
mcmc.burn <- as.integer(30000)
|
||||
mcmc.chainLength <- as.integer(60000) # burn-in plus post-burn
|
||||
mcmc.thin = 10 # to reduce autocorrelation
|
||||
mcmc.chains = 3 # needs to be at least 2 for DIC
|
||||
|
||||
# Parameters to be returned by JAGS
|
||||
jags.save.params=c('r','k','sigma.b', 'alpha', 'sigma.r') #
|
||||
|
||||
# JAGS model
|
||||
Model = "model{
|
||||
# to avoid crash due to 0 values
|
||||
eps<-0.01
|
||||
# set a quite narrow variation from the expected value
|
||||
sigma.b <- 1/16
|
||||
tau.b <- pow(sigma.b,-2)
|
||||
|
||||
Bm[1] <- log(alpha*k)
|
||||
bio[1] ~ dlnorm(Bm[1],tau.b)
|
||||
|
||||
|
||||
for (t in 2:nyr){
|
||||
bio[t] ~ dlnorm(Bm[t],tau.b)
|
||||
Bm[t] <- log(max(bio[t-1] + r*bio[t-1]*(1 - (bio[t-1])/k) - ct[t-1], eps))
|
||||
}
|
||||
|
||||
# priors
|
||||
alpha ~ dunif(0.01,1) # needed for fit of first biomass
|
||||
#inverse cubic root relationship between the range of viable r and the size of the search space
|
||||
inverseRangeFactor <- 1/((start_r[2]-start_r[1])^1/3)
|
||||
|
||||
# give sigma some variability in the inverse relationship
|
||||
sigma.r ~ dunif(0.001*inverseRangeFactor,0.02*inverseRangeFactor)
|
||||
tau.r <- pow(sigma.r,-2)
|
||||
rm <- log((start_r[1]+start_r[2])/2)
|
||||
r ~ dlnorm(rm,tau.r)
|
||||
|
||||
# search in the k space from the center of the range. Allow high variability
|
||||
km <- log((start_k[1]+start_k[2])/2)
|
||||
tau.k <- pow(km,-2)
|
||||
k ~ dlnorm(km,tau.k)
|
||||
|
||||
#end model
|
||||
}"
|
||||
|
||||
# Write JAGS model to file
|
||||
cat(Model, file="r2jags.bug")
|
||||
|
||||
### random seed
|
||||
set.seed(runif(1,1,500)) # needed in JAGS
|
||||
|
||||
### run model
|
||||
jags_outputs <- jags(data=c('ct','bio','nyr', 'start_r', 'start_k'),
|
||||
working.directory=NULL, inits=NULL,
|
||||
parameters.to.save= jags.save.params,
|
||||
model.file="r2jags.bug", n.chains = mcmc.chains,
|
||||
n.burnin = mcmc.burn, n.thin = mcmc.thin, n.iter = mcmc.chainLength,
|
||||
refresh=mcmc.burn/20, )
|
||||
|
||||
# ------------------------------------------------------
|
||||
# Results from JAGS Schaefer
|
||||
# ------------------------------------------------------
|
||||
r_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$r))
|
||||
k_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$k))
|
||||
## sigma_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$sigma.b))
|
||||
alpha_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$alpha))
|
||||
## sigma.r_out <- as.numeric(mcmc(jags_outputs$BUGSoutput$sims.list$sigma.r))
|
||||
|
||||
mean.log.r.jags <- mean(log(r_out))
|
||||
SD.log.r.jags <- sd(log(r_out))
|
||||
lcl.log.r.jags <- mean.log.r.jags-1.96*SD.log.r.jags
|
||||
ucl.log.r.jags <- mean.log.r.jags+1.96*SD.log.r.jags
|
||||
gm.r.jags <- exp(mean.log.r.jags)
|
||||
lcl.r.jags <- exp(lcl.log.r.jags)
|
||||
ucl.r.jags <- exp(ucl.log.r.jags)
|
||||
mean.log.k.jags <- mean(log(k_out))
|
||||
SD.log.k.jags <- sd(log(k_out))
|
||||
lcl.log.k.jags <- mean.log.k.jags-1.96*SD.log.k.jags
|
||||
ucl.log.k.jags <- mean.log.k.jags+1.96*SD.log.k.jags
|
||||
gm.k.jags <- exp(mean.log.k.jags)
|
||||
lcl.k.jags <- exp(lcl.log.k.jags)
|
||||
ucl.k.jags <- exp(ucl.log.k.jags)
|
||||
mean.log.MSY.jags<- mean(log(r_out)+log(k_out)-log(4))
|
||||
SD.log.MSY.jags <- sd(log(r_out)+log(k_out)-log(4))
|
||||
gm.MSY.jags <- exp(mean.log.MSY.jags)
|
||||
lcl.MSY.jags <- exp(mean.log.MSY.jags-1.96*SD.log.MSY.jags)
|
||||
ucl.MSY.jags <- exp(mean.log.MSY.jags+1.96*SD.log.MSY.jags)
|
||||
|
||||
} # end of MCMC Schaefer loop
|
||||
|
||||
#------------------------------------
|
||||
# get results from CMSY
|
||||
#------------------------------------
|
||||
# get estimate of most probable r as median of mid log.r-classes above cut-off
|
||||
# get remaining viable log.r and log.k
|
||||
rem.log.r <- log(rv.all[rv.all > gm.rv])
|
||||
rem.log.k <- log(kv.all[rv.all>gm.rv])
|
||||
# get vectors with numbers of r and mid values in about 25 classes
|
||||
hist.log.r <- hist(x=rem.log.r, breaks=25, plot=F)
|
||||
log.r.counts <- hist.log.r$counts
|
||||
log.r.mids <- hist.log.r$mids
|
||||
# get most probable log.r as mean of mids with counts > 0
|
||||
log.r.est <- median(log.r.mids[which(log.r.counts > 0)])
|
||||
lcl.log.r <- as.numeric(quantile(x=log.r.mids[which(log.r.counts > 0)], 0.025))
|
||||
ucl.log.r <- as.numeric(quantile(x=log.r.mids[which(log.r.counts > 0)], 0.975))
|
||||
r.est <- exp(log.r.est)
|
||||
lcl.r.est <- exp(lcl.log.r)
|
||||
ucl.r.est <- exp(ucl.log.r)
|
||||
|
||||
# do linear regression of log k ~ log r with slope fixed to -1 (from Schaefer)
|
||||
reg <- lm(rem.log.k ~ 1 + offset(-1*rem.log.r))
|
||||
int.reg <- as.numeric(reg[1])
|
||||
sd.reg <- sd(resid(reg))
|
||||
se.reg <- summary(reg)$coefficients[2]
|
||||
# get estimate of log(k) from y where x = log.r.est
|
||||
log.k.est <- int.reg + (-1) * log.r.est
|
||||
# get estimates of CL of log.k.est from y +/- SD where x = lcl.log r or ucl.log.r
|
||||
lcl.log.k <- int.reg + (-1) * ucl.log.r - sd.reg
|
||||
ucl.log.k <- int.reg + (-1) * lcl.log.r + sd.reg
|
||||
k.est <- exp(log.k.est)
|
||||
lcl.k.est <- exp(lcl.log.k)
|
||||
ucl.k.est <- exp(ucl.log.k)
|
||||
|
||||
# get MSY from remaining log r-k pairs
|
||||
log.MSY.est <- mean(rem.log.r + rem.log.k - log(4))
|
||||
sd.log.MSY.est <- sd(rem.log.r + rem.log.k - log(4))
|
||||
lcl.log.MSY.est <- log.MSY.est - 1.96*sd.log.MSY.est
|
||||
ucl.log.MSY.est <- log.MSY.est + 1.96*sd.log.MSY.est
|
||||
MSY.est <- exp(log.MSY.est)
|
||||
lcl.MSY.est <- exp(lcl.log.MSY.est)
|
||||
ucl.MSY.est <- exp(ucl.log.MSY.est)
|
||||
|
||||
# get predicted biomass vectors as median and quantiles of trajectories
|
||||
median.btv <- apply(btv.all,2, median)
|
||||
lastyr.bio <- median.btv[length(median.btv)-1]
|
||||
nextyr.bio <- median.btv[length(median.btv)]
|
||||
lcl.btv <- apply(btv.all,2, quantile, probs=0.025)
|
||||
q.btv <- apply(btv.all,2, quantile, probs=0.25)
|
||||
ucl.btv <- apply(btv.all,2, quantile, probs=0.975)
|
||||
lcl.lastyr.bio <- lcl.btv[length(lcl.btv)-1]
|
||||
ucl.lastyr.bio <- ucl.btv[length(lcl.btv)-1]
|
||||
lcl.nextyr.bio <- lcl.btv[length(lcl.btv)]
|
||||
ucl.nextyr.bio <- ucl.btv[length(lcl.btv)]
|
||||
|
||||
# -----------------------------------------
|
||||
# Plot results
|
||||
# -----------------------------------------
|
||||
# Analysis of viable r-k pairs
|
||||
plot(x=rv.all, y=kv.all, xlim=start_r,
|
||||
ylim=c(0.9*min(kv.all, ifelse(Btype == "observed",k_out,NA), na.rm=T), 1.1*max(kv.all)),
|
||||
pch=16, col="grey",log="xy", bty="l",
|
||||
xlab="r", ylab="k", main="Analysis of viable r-k")
|
||||
abline(v=gm.rv, lty="dashed")
|
||||
|
||||
# plot points and best estimate from full Schaefer analysis
|
||||
if(Btype == "observed"|Btype=="simulated") {
|
||||
# plot r-k pairs from MCMC
|
||||
points(x=r_out, y=k_out, pch=16,cex=0.5)
|
||||
# plot best r-k pair from MCMC
|
||||
points(x=gm.r.jags, y=gm.k.jags, pch=19, col="green")
|
||||
lines(x=c(lcl.r.jags, ucl.r.jags),y=c(gm.k.jags,gm.k.jags), col="green")
|
||||
lines(x=c(gm.r.jags,gm.r.jags),y=c(lcl.k.jags, ucl.k.jags), col="green")
|
||||
}
|
||||
|
||||
# if data are from simulation, plot true r and k
|
||||
if(Btype=="simulated") {
|
||||
l.stock <- nchar(stock) # get length of sim stock name
|
||||
r.char <- substr(stock,l.stock-1,l.stock) # get last character of sim stock name
|
||||
r.sim <- NA # initialize vector for r used in simulation
|
||||
if(r.char=="_H") {r.sim=1; lcl.r.sim=0.8; ucl.r.sim=1.25} else
|
||||
if(r.char=="_M") {r.sim=0.5;lcl.r.sim=0.4;ucl.r.sim=0.62} else
|
||||
if(r.char=="_L") {r.sim=0.25;lcl.r.sim=0.2;ucl.r.sim=0.31} else {r.sim=0.05;lcl.r.sim=0.04;ucl.r.sim=0.062}
|
||||
# plot true r-k point with error bars
|
||||
points(x=r.sim, y=1000, pch=19, col="red")
|
||||
# add +/- 20% error bars
|
||||
lines(x=c(lcl.r.sim,ucl.r.sim), y=c(1000,1000), col="red")
|
||||
lines(x=c(r.sim,r.sim), y=c(800,1250), col="red")
|
||||
}
|
||||
|
||||
# plot blue dot for proposed r-k, with 95% CL lines
|
||||
points(x=r.est, y=k.est, pch=19, col="blue")
|
||||
lines(x=c(lcl.r.est, ucl.r.est),y=c(k.est,k.est), col="blue")
|
||||
lines(x=c(r.est,r.est),y=c(lcl.k.est, ucl.k.est), col="blue")
|
||||
|
||||
# plot biomass graph
|
||||
# determine k to use for red line in b/k plot
|
||||
if(Btype=="simulated") {k2use <- 1000} else
|
||||
if(Btype == "observed") {k2use <- gm.k.jags} else {k2use <- k.est}
|
||||
# determine hight of y-axis in plot
|
||||
max.y <- max(c(bio/k2use,ucl.btv,0.6,startbio[2], intbio[2],endbio[2]),na.rm=T)
|
||||
|
||||
plot(x=yr,y=median.btv[1:nyr], lwd=2, xlab="Year", ylab="Relative biomass b/k", type="l",
|
||||
ylim=c(0,max.y), bty="l", main=paste("Pred. biomass vs ", Btype,sep=""))
|
||||
lines(x=yr, y=lcl.btv[1:nyr],type="l")
|
||||
lines(x=yr, y=ucl.btv[1:nyr],type="l")
|
||||
points(x=EndYear,y=q.btv[yr==EndYear], col="purple", cex=1.5, lwd=2)
|
||||
abline(h=0.5, lty="dashed")
|
||||
abline(h=0.25, lty="dotted")
|
||||
lines(x=c(yr[1],yr[1]), y=startbio, col="blue")
|
||||
lines(x=c(intyr,intyr), y=intbio, col="blue")
|
||||
lines(x=c(max(yr),max(yr)), y=endbio, col="blue")
|
||||
|
||||
# if observed biomass is available, plot red biomass line
|
||||
if(Btype == "observed"|Btype=="simulated") {
|
||||
lines(x=yr, y=bio/k2use,type="l", col="red", lwd=1)
|
||||
}
|
||||
|
||||
# if CPUE data are available, scale to predicted biomass range, plot red biomass line
|
||||
if(Btype == "CPUE") {
|
||||
par(new=T) # prepares for new plot on top of previous
|
||||
plot(x=yr, y=bio, type="l", col="red", lwd=1,
|
||||
ann=F,axes=F,ylim=c(0,1.2*max(bio, na.rm=T))) # forces this plot on top of previous one
|
||||
axis(4, col="red", col.axis="red")
|
||||
}
|
||||
|
||||
# plot yield and biomass against equilibrium surplus parabola
|
||||
max.y <-max(c(ct/MSY.est,ifelse(Btype=="observed"|Btype=="simulated",ct/gm.MSY.jags,NA),1.2),na.rm=T)
|
||||
# plot parabola
|
||||
x=seq(from=0,to=2,by=0.001)
|
||||
y=4*x-(2*x)^2
|
||||
plot(x=x, y=y, xlim=c(0,1), ylim=c(0,max.y), type="l", bty="l",xlab="Relative biomass b/k",
|
||||
ylab="Catch / MSY", main="Equilibrium curve")
|
||||
# plot catch against CMSY biomass estimates
|
||||
points(x=median.btv[1:nyr], y=ct/MSY.est, pch=16, col="grey")
|
||||
points(x=q.btv[yr==EndYear],y=ct[yr==EndYear]/MSY.est, col="purple", cex=1.5, lwd=2)
|
||||
# plot catch against observed biomass or CPUE
|
||||
if(Btype == "observed"|Btype=="simulated") {
|
||||
points(x=bio/k2use, y=ct/gm.MSY.jags, pch=16, cex=0.5)
|
||||
}
|
||||
|
||||
# plot exploitation rate u against u.msy
|
||||
# get u derived from predicted CMSY biomass
|
||||
u.CMSY <- ct/(median.btv[1:nyr]*k.est)
|
||||
u.msy.CMSY <- 1-exp(-r.est/2) # # Fmsy from CMSY expressed as exploitation rate
|
||||
# get u from observed or simulated biomass
|
||||
if(Btype == "observed"|Btype=="simulated") {
|
||||
u.bio <- ct/bio
|
||||
u.msy.bio <- 1-exp(-gm.r.jags/2)
|
||||
}
|
||||
# get u from CPUE
|
||||
if(Btype == "CPUE") {
|
||||
q=max(median.btv[1:nyr][is.na(bio)==F],na.rm=T)*k.est/max(bio,na.rm=T)
|
||||
u.CPUE <- ct/(q*bio)
|
||||
}
|
||||
|
||||
# determine upper bound of Y-axis
|
||||
max.y <- max(c(1.5, 1.2*u.CMSY/u.msy.CMSY,ct[yr==EndYear]/(q.btv[yr==EndYear]*k.est)/u.msy.CMSY,
|
||||
ifelse(Btype=="observed"|Btype=="simulated",max(u.bio[is.na(u.bio)==F]/u.msy.bio),0),
|
||||
na.rm=T))
|
||||
# plot u from CMSY
|
||||
plot(x=yr,y=u.CMSY/u.msy.CMSY, type="l", bty="l", ylim=c(0,max.y), xlab="Year",
|
||||
ylab="u / u_msy", main="Exploitation rate")
|
||||
abline(h=1, lty="dashed")
|
||||
points(x=EndYear,y=ct[yr==EndYear]/(q.btv[yr==EndYear]*k.est)/u.msy.CMSY, col="purple", cex=1.5, lwd=2)
|
||||
# plot u from biomass
|
||||
if(Btype == "observed"|Btype=="simulated") lines(x=yr, y=u.bio/u.msy.bio, col="red")
|
||||
# plot u from CPUE
|
||||
if(Btype == "CPUE") {
|
||||
par(new=T) # prepares for new plot on top of previous
|
||||
plot(x=yr, y=u.CPUE, type="l", col="red", ylim=c(0, 1.2*max(u.CPUE,na.rm=T)),ann=F,axes=F)
|
||||
axis(4, col="red", col.axis="red")
|
||||
}
|
||||
if(batch.mode == TRUE) {dev.off()} # close plot window
|
||||
|
||||
# ------------------------------------------
|
||||
# print input and results to screen
|
||||
cat("---------------------------------------\n")
|
||||
|
||||
cat("Species:", cinfo$ScientificName[cinfo$stock==stock], "\n")
|
||||
cat("Name and region:", cinfo$EnglishName[cinfo$stock==stock], ",", cinfo$Name[cinfo$stock==stock], "\n")
|
||||
cat("Stock:",stock,"\n")
|
||||
cat("Catch data used from years", min(yr),"-", max(yr), "\n")
|
||||
cat("Prior initial relative biomass =", startbio[1], "-", startbio[2], "\n")
|
||||
cat("Prior intermediate rel. biomass=", intbio[1], "-", intbio[2], "in year", intyr, "\n")
|
||||
cat("Prior final relative biomass =", endbio[1], "-", endbio[2], "\n")
|
||||
cat("If current catches continue, is the stock likely to crash within 3 years?",FutureCrash,"\n")
|
||||
cat("Prior range for r =", format(start_r[1],digits=2), "-", format(start_r[2],digits=2),
|
||||
", prior range for k =", start_k[1], "-", start_k[2],"\n")
|
||||
|
||||
# if data are simulated, print true r-k
|
||||
if(filename_1=="SimCatch.csv") {
|
||||
cat("True r =", r.sim, "(because input data were simulated with Schaefer model)\n")
|
||||
cat("True k = 1000 \n")
|
||||
cat("True MSY =", 1000*r.sim/4,"\n")
|
||||
cat("True biomass in last year =",bio[length(bio)],"or",bio[length(bio)]/1000,"k \n")
|
||||
cat("True mean catch / MSY ratio =", mean(ct)/(1000*r.sim/4),"\n")
|
||||
}
|
||||
# print results from full Schaefer if available
|
||||
if(Btype == "observed"|Btype=="simulated") {
|
||||
cat("Results from Bayesian Schaefer model using catch & biomass (",Btype,")\n")
|
||||
cat("MSY =", gm.MSY.jags,", 95% CL =", lcl.MSY.jags, "-", ucl.MSY.jags,"\n")
|
||||
cat("Mean catch / MSY =", mean(ct)/gm.MSY.jags,"\n")
|
||||
if(Btype != "CPUE") {
|
||||
cat("r =", gm.r.jags,", 95% CL =", lcl.r.jags, "-", ucl.r.jags,"\n")
|
||||
cat("k =", gm.k.jags,", 95% CL =", lcl.k.jags, "-", ucl.k.jags,"\n")
|
||||
}
|
||||
}
|
||||
# results of CMSY analysis
|
||||
cat("Results of CMSY analysis \n")
|
||||
cat("Altogether", nviablepoints,"unique viable r-k pairs were found \n")
|
||||
cat(nviablepoints-length(rem.log.r),"r-k pairs above the initial geometric mean of r =", gm.rv, "were analysed\n")
|
||||
cat("r =", r.est,", 95% CL =", lcl.r.est, "-", ucl.r.est,"\n")
|
||||
cat("k =", k.est,", 95% CL =", lcl.k.est, "-", ucl.k.est,"\n")
|
||||
cat("MSY =", MSY.est,", 95% CL =", lcl.MSY.est, "-", ucl.MSY.est,"\n")
|
||||
cat("Predicted biomass in last year =", lastyr.bio, "2.5th perc =", lcl.lastyr.bio,
|
||||
"97.5th perc =", ucl.lastyr.bio,"\n")
|
||||
cat("Predicted biomass in next year =", nextyr.bio, "2.5th perc =", lcl.nextyr.bio,
|
||||
"97.5th perc =", ucl.nextyr.bio,"\n")
|
||||
cat("----------------------------------------------------------\n")
|
||||
|
||||
## Write some results into outfile
|
||||
if(write.output == TRUE) {
|
||||
# write data into csv file
|
||||
output = data.frame(cinfo$ScientificName[cinfo$stock==stock], stock, StartYear, EndYear, mean(ct)*1000,
|
||||
ifelse(Btype=="observed"|Btype=="simulate",bio[length(bio)],NA), # last biomass on record
|
||||
ifelse(Btype == "observed"|Btype=="simulated",gm.MSY.jags,NA), # full Schaefer
|
||||
ifelse(Btype == "observed"|Btype=="simulated",lcl.MSY.jags,NA),
|
||||
ifelse(Btype == "observed"|Btype=="simulated",ucl.MSY.jags,NA),
|
||||
ifelse(Btype == "observed"|Btype=="simulated",gm.r.jags,NA),
|
||||
ifelse(Btype == "observed"|Btype=="simulated",lcl.r.jags,NA),
|
||||
ifelse(Btype == "observed"|Btype=="simulated",ucl.r.jags,NA),
|
||||
ifelse(Btype == "observed"|Btype=="simulated",gm.k.jags,NA),
|
||||
ifelse(Btype == "observed"|Btype=="simulated",lcl.k.jags,NA),
|
||||
ifelse(Btype == "observed"|Btype=="simulated",ucl.k.jags,NA),
|
||||
r.est, lcl.r.est, ucl.r.est, # CMSY r
|
||||
k.est, lcl.k.est, ucl.k.est, # CMSY k
|
||||
MSY.est, lcl.MSY.est, ucl.MSY.est, # CMSY r
|
||||
lastyr.bio, lcl.lastyr.bio, ucl.lastyr.bio, # last year bio
|
||||
nextyr.bio, lcl.nextyr.bio, ucl.nextyr.bio)# last year + 1 bio
|
||||
|
||||
write.table(output, file=outfile, append = T, sep = ",",
|
||||
dec = ".", row.names = FALSE, col.names = FALSE)
|
||||
|
||||
# write some text into text outfile.txt
|
||||
|
||||
cat("Species:", cinfo$ScientificName[cinfo$stock==stock], "\n",
|
||||
"Name:", cinfo$EnglishName[cinfo$stock==stock], "\n",
|
||||
"Region:", cinfo$Name[cinfo$stock==stock], "\n",
|
||||
"Stock:",stock,"\n",
|
||||
"Catch data used from years", min(yr),"-", max(yr),", biomass =", Btype, "\n",
|
||||
"Prior initial relative biomass =", startbio[1], "-", startbio[2], "\n",
|
||||
"Prior intermediate rel. biomass=", intbio[1], "-", intbio[2], "in year", intyr, "\n",
|
||||
"Prior final relative biomass =", endbio[1], "-", endbio[2], "\n",
|
||||
"Future crash with current catches?", FutureCrash, "\n",
|
||||
"Prior range for r =", format(start_r[1],digits=2), "-", format(start_r[2],digits=2),
|
||||
", prior range for k =", start_k[1], "-", start_k[2],"\n",
|
||||
file=outfile.txt,append=T)
|
||||
|
||||
if(filename_1=="SimCatch.csv") {
|
||||
cat(" True r =", r.sim, "(because input data were simulated with Schaefer model)\n",
|
||||
"True k = 1000, true MSY =", 1000*r.sim/4,"\n",
|
||||
"True biomass in last year =",bio[length(bio)],"or",bio[length(bio)]/1000,"k \n",
|
||||
"True mean catch / MSY ratio =", mean(ct)/(1000*r.sim/4),"\n",
|
||||
file=outfile.txt,append=T)
|
||||
}
|
||||
if(Btype == "observed"|Btype=="simulated") {
|
||||
cat(" Results from Bayesian Schaefer model using catch & biomass \n",
|
||||
"r =", gm.r.jags,", 95% CL =", lcl.r.jags, "-", ucl.r.jags,"\n",
|
||||
"k =", gm.k.jags,", 95% CL =", lcl.k.jags, "-", ucl.k.jags,"\n",
|
||||
"MSY =", gm.MSY.jags,", 95% CL =", lcl.MSY.jags, "-", ucl.MSY.jags,"\n",
|
||||
"Mean catch / MSY =", mean(ct)/gm.MSY.jags,"\n",
|
||||
file=outfile.txt,append=T)
|
||||
}
|
||||
cat(" Results of CMSY analysis with altogether", nviablepoints,"unique viable r-k pairs \n",
|
||||
nviablepoints-length(rem.log.r),"r-k pairs above the initial geometric mean of r =", gm.rv, "were analysed\n",
|
||||
"r =", r.est,", 95% CL =", lcl.r.est, "-", ucl.r.est,"\n",
|
||||
"k =", k.est,", 95% CL =", lcl.k.est, "-", ucl.k.est,"\n",
|
||||
"MSY =", MSY.est,", 95% CL =", lcl.MSY.est, "-", ucl.MSY.est,"\n",
|
||||
"Predicted biomass last year b/k =", lastyr.bio, "2.5th perc b/k =", lcl.lastyr.bio,
|
||||
"97.5th perc b/k =", ucl.lastyr.bio,"\n",
|
||||
"Precautionary 25th percentile b/k =",q.btv[yr==EndYear],"\n",
|
||||
"----------------------------------------------------------\n",
|
||||
file=outfile.txt,append=T)
|
||||
|
||||
}
|
||||
|
||||
} # end of stocks loop
|
|
@ -0,0 +1,119 @@
|
|||
##--------------------------------------------------------
|
||||
## CMSY analysis with estimation of total biomass, including Bayesian Schaefer
|
||||
## written by Rainer Froese with support from Gianpaolo Coro in 2013-2014
|
||||
## This version adjusts biomass to average biomass over the year
|
||||
## It also contains the FutureCrash option to improve prediction of final biomass
|
||||
## Version 21 adds the purple point to indicate the 25th percentile of final biomass
|
||||
## Version 22 accepts that no biomass or CPUE area available
|
||||
##--------------------------------------------------------
|
||||
library(R2jags) # Interface with JAGS
|
||||
library(coda)
|
||||
|
||||
#-----------------------------------------
|
||||
# Some general settings
|
||||
#-----------------------------------------
|
||||
# set.seed(999) # use for comparing results between runs
|
||||
rm(list=ls(all=TRUE)) # clear previous variables etc
|
||||
options(digits=3) # displays all numbers with three significant digits as default
|
||||
graphics.off() # close graphics windows from previous sessions
|
||||
|
||||
#-----------------------------------------
|
||||
# General settings for the analysis
|
||||
#-----------------------------------------
|
||||
sigR <- 0.02 # overall process error; 0.05 works reasonable for simulations, 0.02 for real data; 0 if deterministic model
|
||||
n <- 10000 # initial number of r-k pairs
|
||||
batch.mode <- T # set to TRUE to suppress graphs
|
||||
write.output <- T # set to true if table of output is wanted
|
||||
FutureCrash <- "No"
|
||||
|
||||
#-----------------------------------------
|
||||
# Start output to screen
|
||||
#-----------------------------------------
|
||||
cat("-------------------------------------------\n")
|
||||
cat("Catch-MSY Analysis,", date(),"\n")
|
||||
cat("-------------------------------------------\n")
|
||||
|
||||
#------------------------------------------
|
||||
# Read data and assign to vectors
|
||||
#------------------------------------------
|
||||
# filename_1 <- "AllStocks_Catch4.csv"
|
||||
# filename_2 <- "AllStocks_ID4.csv"
|
||||
# filename_1 <- "SimCatch.csv"
|
||||
# filename_2 <- "SimSpec.csv"
|
||||
# filename_2 <- "SimSpecWrongS.csv"
|
||||
# filename_2 <- "SimSpecWrongI.csv"
|
||||
# filename_2 <- "SimSpecWrongF.csv"
|
||||
# filename_2 <- "SimSpecWrongH.csv"
|
||||
# filename_2 <- "SimSpecWrongL.csv"
|
||||
# filename_1 <- "FishDataLim.csv"
|
||||
# filename_2 <- "FishDataLimSpec.csv"
|
||||
filename_1 <- "WKLIFE4Stocks.csv"
|
||||
filename_2 <- "WKLIFE4ID.csv"
|
||||
|
||||
outfile<-"outfile"
|
||||
outfile.txt <- "outputfile.txt"
|
||||
|
||||
cdat <- read.csv(filename_1, header=T, dec=".", stringsAsFactors = FALSE)
|
||||
cinfo <- read.csv(filename_2, header=T, dec=".", stringsAsFactors = FALSE)
|
||||
cat("Files", filename_1, ",", filename_2, "read successfully","\n")
|
||||
|
||||
# Stocks with total biomass data and catch data from StartYear to EndYear
|
||||
# stocks <- sort(as.character(cinfo$stock)) # All stocks
|
||||
stocks<-"HLH_M07"
|
||||
|
||||
# select one stock after the other
|
||||
for(stock in stocks) {
|
||||
# assign data from cinfo to vectors
|
||||
res <- as.character(cinfo$Resilience[cinfo$stock==stock])
|
||||
StartYear <- as.numeric(cinfo$StartYear[cinfo$stock==stock])
|
||||
EndYear <- as.numeric(cinfo$EndYear[cinfo$stock==stock])
|
||||
r_low <- as.numeric(cinfo$r_low[cinfo$stock==stock])
|
||||
r_hi <- as.numeric(cinfo$r_hi[cinfo$stock==stock])
|
||||
stb_low <- as.numeric(cinfo$stb_low[cinfo$stock==stock])
|
||||
stb_hi <- as.numeric(cinfo$stb_hi[cinfo$stock==stock])
|
||||
intyr <- as.numeric(cinfo$intyr[cinfo$stock==stock])
|
||||
intbio_low <- as.numeric(cinfo$intbio_low[cinfo$stock==stock])
|
||||
intbio_hi <- as.numeric(cinfo$intbio_hi[cinfo$stock==stock])
|
||||
endbio_low <- as.numeric(cinfo$endbio_low[cinfo$stock==stock])
|
||||
endbio_hi <- as.numeric(cinfo$endbio_hi[cinfo$stock==stock])
|
||||
Btype <- as.character(cinfo$Btype[cinfo$stock==stock])
|
||||
FutureCrash <- as.character(cinfo$FutureCrash[cinfo$stock==stock])
|
||||
comment <- as.character(cinfo$comment[cinfo$stock==stock])
|
||||
|
||||
|
||||
# extract data on stock
|
||||
yr <- as.numeric(cdat$yr[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])
|
||||
ct <- as.numeric(cdat$ct[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that catch is given in tonnes, transforms to '000 tonnes
|
||||
if(Btype=="observed" | Btype=="CPUE" | Btype=="simulated") {
|
||||
bt <- as.numeric(cdat$TB[cdat$stock==stock & cdat$yr >= StartYear & cdat$yr <= EndYear])/1000 ## assumes that biomass is in tonnes, transforms to '000 tonnes
|
||||
} else {bt <- NA}
|
||||
nyr <- length(yr) # number of years in the time series
|
||||
|
||||
cat("->---------------------------------------
|
||||
Species: NA
|
||||
Name and region: NA , NA
|
||||
Stock: HLH_M07
|
||||
Catch data used from years 1 - 50
|
||||
Prior initial relative biomass = 0.5 - 0.9
|
||||
Prior intermediate rel. biomass= 0.01 - 0.4 in year 25
|
||||
Prior final relative biomass = 0.4 - 0.8
|
||||
If current catches continue, is the stock likely to crash within 3 years? No
|
||||
Prior range for r = 0.2 - 0.8 , prior range for k = 125 - 9965
|
||||
Results from Bayesian Schaefer model using catch & biomass ( simulated )
|
||||
MSY = 91.7 , 95% CL = 83.9 - 100
|
||||
Mean catch / MSY = 0.882
|
||||
r = 0.425 , 95% CL = 0.374 - 0.483
|
||||
k = 863 , 95% CL = 783 - 951
|
||||
Results of CMSY analysis
|
||||
Altogether 2055 unique viable r-k pairs were found
|
||||
1142 r-k pairs above the initial geometric mean of r = 0.343 were analysed
|
||||
r = 0.522 , 95% CL = 0.349 - 0.782
|
||||
k = 683 , 95% CL = 438 - 1067
|
||||
MSY = 89.2 , 95% CL = 82.2 - 96.7
|
||||
Predicted biomass in last year = 0.676 2.5th perc = 0.435 97.5th perc = 0.768
|
||||
Predicted biomass in next year = 0.673 2.5th perc = 0.433 97.5th perc = 0.758
|
||||
----------------------------------------------------------
|
||||
",file=outfile.txt,append=T)
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<hibernate-configuration>
|
||||
<session-factory>
|
||||
<property name="connection.driver_class">org.postgresql.Driver</property>
|
||||
<property name="connection.provider_class">org.hibernate.connection.C3P0ConnectionProvider</property>
|
||||
<property name="connection.url">jdbc:postgresql://localhost/testdb</property>
|
||||
<property name="connection.username">gcube</property>
|
||||
<property name="connection.password">d4science2</property>
|
||||
<property name="dialect">org.hibernate.dialect.PostgreSQLDialect</property>
|
||||
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
|
||||
<property name="c3p0.timeout">0</property>
|
||||
<property name="c3p0.max_size">1</property>
|
||||
<property name="c3p0.max_statements">0</property>
|
||||
<property name="c3p0.min_size">1</property>
|
||||
<property name="current_session_context_class">thread</property>
|
||||
</session-factory>
|
||||
</hibernate-configuration>
|
Binary file not shown.
|
@ -0,0 +1,530 @@
|
|||
#### R and JAGS code for estimating LWR-parameters from previous studies
|
||||
#### Meant for updating the ESTIMATE table in FishBase
|
||||
#### Created by Rainer Froese in March 2013, including JAGS models by James Thorston
|
||||
#### Modified in June 2013 to include subfamilies
|
||||
|
||||
rm(list=ls(all=TRUE)) # remove previous variables and data
|
||||
options(digits=3) # 3 significant digits as default
|
||||
library(R2jags) # Interface with JAGS
|
||||
runif(1) # sets random seed
|
||||
|
||||
#### Read in data
|
||||
DataFile = "RF_LWR2.csv" # RF_LWR4 was extracted from FishBase in June 2013
|
||||
Data = read.csv(DataFile, header=TRUE)
|
||||
cat("Start", date(), "\n")
|
||||
cat("Data file =", DataFile, "\n")
|
||||
# Get unique, sorted list of Families
|
||||
Fam.All <- sort(unique(as.character(Data$Family)))
|
||||
Families <- Fam.All[Fam.All== "Acanthuridae" | Fam.All == "Achiridae"]
|
||||
|
||||
OutFile = "LWR_Test1.csv"
|
||||
JAGSFILE = "dmnorm_0.bug"
|
||||
|
||||
# Get unique, sorted list of body shapes
|
||||
Bshape <- sort(unique(as.character(Data$BodyShapeI)))
|
||||
|
||||
#------------------------------------------
|
||||
# Functions
|
||||
#------------------------------------------
|
||||
|
||||
#---------------------------------------------------------
|
||||
# Function to get the priors for the respective body shape
|
||||
#---------------------------------------------------------
|
||||
Get.BS.pr <- function(BS) {
|
||||
### Assignment of priors based on available body shape information
|
||||
# priors derived from 5150 LWR studies in FishBase 02/2013
|
||||
|
||||
if (BS == "eel-like") { # eel-like prior for log(a) and b
|
||||
prior_mean_log10a = -2.99
|
||||
prior_sd_log10a = 0.175
|
||||
prior_tau_log10a = 1/prior_sd_log10a^2
|
||||
prior_mean_b = 3.06
|
||||
prior_sd_b = 0.0896
|
||||
prior_tau_b = 1/prior_sd_b^2
|
||||
} else
|
||||
if (BS == "elongated") { # elongate prior for log(a) and b
|
||||
prior_mean_log10a = -2.41
|
||||
prior_sd_log10a = 0.171
|
||||
prior_tau_log10a = 1/prior_sd_log10a^2
|
||||
prior_mean_b = 3.12
|
||||
prior_sd_b = 0.09
|
||||
prior_tau_b = 1/prior_sd_b^2
|
||||
} else
|
||||
if (BS == "fusiform / normal") { # fusiform / normal prior for log(a) and b
|
||||
prior_mean_log10a = -1.95
|
||||
prior_sd_log10a = 0.173
|
||||
prior_tau_log10a = 1/prior_sd_log10a^2
|
||||
prior_mean_b = 3.04
|
||||
prior_sd_b = 0.0857
|
||||
prior_tau_b = 1/prior_sd_b^2
|
||||
} else
|
||||
if (BS == "short and / or deep") { # short and / or deep prior for log(a) and b
|
||||
prior_mean_log10a = -1.7
|
||||
prior_sd_log10a = 0.175
|
||||
prior_tau_log10a = 1/prior_sd_log10a^2
|
||||
prior_mean_b = 3.01
|
||||
prior_sd_b = 0.0905
|
||||
prior_tau_b = 1/prior_sd_b^2
|
||||
} else
|
||||
# priors across all shapes, used for missing or other BS
|
||||
{
|
||||
prior_mean_log10a = -2.0
|
||||
prior_sd_log10a = 0.313
|
||||
prior_tau_log10a = 1/prior_sd_log10a^2
|
||||
prior_mean_b = 3.04
|
||||
prior_sd_b = 0.119
|
||||
prior_tau_b = 1/prior_sd_b^2
|
||||
}
|
||||
|
||||
# Priors for measurement error (= sigma) based on 5150 studies
|
||||
# given here as shape mu and rate r, for gamma distribution
|
||||
SD_rObs_log10a = 6520
|
||||
SD_muObs_log10a = 25076
|
||||
SD_rObs_b = 6808
|
||||
SD_muObs_b = 37001
|
||||
# Priors for between species variability (= sigma) based on 5150 studies for 1821 species
|
||||
SD_rGS_log10a = 1372
|
||||
SD_muGS_log10a = 7933
|
||||
SD_rGS_b = 572
|
||||
SD_muGS_b = 6498
|
||||
|
||||
prior.list <- list(mean_log10a=prior_mean_log10a, sd_log10a=prior_sd_log10a,
|
||||
tau_log10a=prior_tau_log10a, mean_b=prior_mean_b, sd_b=prior_sd_b,
|
||||
tau_b=prior_tau_b, SD_rObs_log10a=SD_rObs_log10a, SD_muObs_log10a=SD_muObs_log10a,
|
||||
SD_rObs_b=SD_rObs_b, SD_muObs_b=SD_muObs_b, SD_rGS_log10a=SD_rGS_log10a,
|
||||
SD_muGS_log10a=SD_muGS_log10a, SD_rGS_b=SD_rGS_b, SD_muGS_b=SD_muGS_b)
|
||||
return(prior.list)
|
||||
}
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Function to do a Bayesian analysis including LWR from relatives
|
||||
#--------------------------------------------------------------------
|
||||
SpecRelLWR <- function(a, b, wts, GenusSpecies, Nspecies, prior_mean_b, prior_tau_b,
|
||||
prior_mean_log10a, prior_tau_log10a, SD_rObs_log10a, SD_muObs_log10a,
|
||||
SD_rObs_b, SD_muObs_b, SD_rGS_log10a, SD_muGS_log10a,
|
||||
SD_rGS_b, SD_muGS_b){
|
||||
### Define JAGS model
|
||||
Model = "
|
||||
model {
|
||||
#### Process model -- effects of taxonomy
|
||||
# given the likelihood distributions and the priors,
|
||||
# create normal posterior distributions for log10a, b,
|
||||
# and for the process error (=between species variability sigmaGS)
|
||||
|
||||
abTrue[1] ~ dnorm(prior_mean_log10a,prior_tau_log10a)
|
||||
abTrue[2] ~ dnorm(prior_mean_b,prior_tau_b)
|
||||
sigmaGSlog10a ~ dgamma( SD_rGS_log10a, SD_muGS_log10a)
|
||||
sigmaGSb ~ dgamma( SD_rGS_b, SD_muGS_b)
|
||||
|
||||
# given the posterior distributions and the process errors,
|
||||
# establish for every species the expected witin-species
|
||||
# parameter distributions; no correlation roGS between species
|
||||
|
||||
roGS <- 0
|
||||
tauGenusSpecies[1] <- pow(sigmaGSlog10a,-2)
|
||||
tauGenusSpecies[2] <- pow(sigmaGSb,-2)
|
||||
for(k in 1:Nspecies){
|
||||
abGenusSpecies[k,1] ~ dnorm(abTrue[1],tauGenusSpecies[1])
|
||||
abGenusSpecies[k,2] ~ dnorm(abTrue[2],tauGenusSpecies[2])
|
||||
}
|
||||
|
||||
### Observation model
|
||||
## Errors
|
||||
# given the data and the priors, establish distributions
|
||||
# for the observation errors sigmaObs
|
||||
|
||||
sigmaObslog10a ~ dgamma( SD_rObs_log10a, SD_muObs_log10a)
|
||||
sigmaObsb ~ dgamma( SD_rObs_b, SD_muObs_b)
|
||||
|
||||
# create inverse covariance matrix, with negative parameter correlation roObs
|
||||
roObs ~ dunif(-0.99,0)
|
||||
CovObs[1,1] <- pow(sigmaObslog10a,2)
|
||||
CovObs[2,2] <- pow(sigmaObsb,2)
|
||||
CovObs[1,2] <- roObs * sigmaObslog10a * sigmaObsb
|
||||
CovObs[2,1] <- CovObs[1,2]
|
||||
TauObs[1:2,1:2] <- inverse(CovObs[1:2,1:2])
|
||||
|
||||
## likelihood
|
||||
# given the data, the priors and the covariance,
|
||||
# create multivariate likelihood distributions for log10(a) and b
|
||||
|
||||
for(i in 1:N){
|
||||
TauObsI[i,1:2,1:2] <- TauObs[1:2,1:2] * pow(Weights[i],2) # weighted precision
|
||||
ab[i,1:2] ~ dmnorm(abGenusSpecies[GenusSpecies[i],1:2],TauObsI[i,1:2,1:2])
|
||||
}
|
||||
}
|
||||
"
|
||||
|
||||
# Write JAGS model
|
||||
cat(Model, file=JAGSFILE)
|
||||
# JAGS settings
|
||||
Nchains = 3 # number of MCMC chains to be used in JAGS
|
||||
Nburnin = 1e4 # number of burn-in iterations, to be discarded; 1e4 = 10000 iterations for burn-in
|
||||
Niter = 3e4 # number of iterations after burn-in; 3e4 = 30000 iterations
|
||||
Nthin = 1e1 # subset of iterations to be used for analysis; 1e1 = every 10th iteration
|
||||
|
||||
# Run JAGS: define data to be passed on in DataJags;
|
||||
# determine parameters to be returned in Param2Save;
|
||||
# call JAGS with function Jags()
|
||||
DataJags = list(ab=cbind(log10(a),b), N=length(a), Weights=wts, Nspecies=Nspecies, GenusSpecies=GenusSpecies,
|
||||
prior_mean_b=prior_mean_b, prior_tau_b=prior_tau_b,
|
||||
prior_mean_log10a=prior_mean_log10a, prior_tau_log10a=prior_tau_log10a,
|
||||
SD_rObs_log10a=SD_rObs_log10a, SD_muObs_log10a=SD_muObs_log10a,
|
||||
SD_rObs_b=SD_rObs_b, SD_muObs_b=SD_muObs_b,
|
||||
SD_rGS_log10a=SD_rGS_log10a, SD_muGS_log10a=SD_muGS_log10a,
|
||||
SD_rGS_b=SD_rGS_b, SD_muGS_b=SD_muGS_b)
|
||||
Params2Save = c("abTrue","abGenusSpecies","sigmaGSlog10a","sigmaGSb","sigmaObslog10a","sigmaObsb","roObs")
|
||||
Jags <- jags(inits=NULL, model.file=JAGSFILE, working.directory=NULL, data=DataJags,
|
||||
parameters.to.save=Params2Save, n.chains=Nchains, n.thin=Nthin, n.iter=Niter, n.burnin=Nburnin)
|
||||
Jags$BUGSoutput # contains the results from the JAGS run
|
||||
|
||||
# Analyze output for the relatives
|
||||
abTrue <- Jags$BUGSoutput$sims.list$abTrue
|
||||
R_mean_log10a <- mean(abTrue[,1]) # true mean of log10(a)
|
||||
R_sd_log10a <- sd(abTrue[,1]) # true SE of log10(a)
|
||||
R_mean_b <- mean(abTrue[,2]) # true mean of b
|
||||
R_sd_b <- sd(abTrue[,2]) # true SE of b
|
||||
|
||||
# Analyze output for the target species
|
||||
abGenusSpecies <- Jags$BUGSoutput$sims.list$abGenusSpecies
|
||||
mean_log10a <- mean(abGenusSpecies[,1,1]) # true mean of log10(a) for the first species= target species
|
||||
sd_log10a <- sd(abGenusSpecies[,1,1]) # true SE of log10(a)
|
||||
mean_b <- mean(abGenusSpecies[,1,2]) # true mean of b
|
||||
sd_b <- sd(abGenusSpecies[,1,2]) # true SE of b
|
||||
mean_sigma_log10a <- mean(Jags$BUGSoutput$sims.list$sigmaObslog10a) # measurement error of log10(a)
|
||||
sd_sigma_log10a <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigmaObslog10a), 2, sd)
|
||||
mean_sigma_b <- mean(Jags$BUGSoutput$sims.list$sigmaObsb) # measurement error of b
|
||||
sd_sigma_b <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigmaObsb), 2, sd)
|
||||
ro_ab <- mean(Jags$BUGSoutput$sims.list$roObs) # measurement correlation of log10(a),b
|
||||
|
||||
out.list <- list(N=length(a), mean_log10a=mean_log10a, sd_log10a=sd_log10a, mean_b=mean_b, sd_b=sd_b,
|
||||
R_mean_log10a=R_mean_log10a, R_sd_log10a=R_sd_log10a, R_mean_b=R_mean_b, R_sd_b=R_sd_b)
|
||||
return(out.list)
|
||||
}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Function to do a Bayesian LWR analysis with studies for target species only
|
||||
#-----------------------------------------------------------------------------
|
||||
SpecLWR <- function(a, b, wts, prior_mean_b, prior_tau_b,
|
||||
prior_mean_log10a, prior_tau_log10a, SD_rObs_log10a, SD_muObs_log10a,
|
||||
SD_rObs_b, SD_muObs_b, SD_rGS_log10a, SD_muGS_log10a,
|
||||
SD_rGS_b, SD_muGS_b){
|
||||
|
||||
# Define JAGS model
|
||||
Model = "
|
||||
model {
|
||||
sigma1 ~ dgamma( SD_rObs_log10a, SD_muObs_log10a) # posterior distribution for measurement error in log10a
|
||||
sigma2 ~ dgamma( SD_rObs_b, SD_muObs_b) # posterior distribution for measurement error in log10a
|
||||
|
||||
ro ~ dunif(-0.99,0) # uniform prior for negative correlation between log10a and b
|
||||
abTrue[1] ~ dnorm(prior_mean_log10a,prior_tau_log10a) # normal posterior distribution for log10a
|
||||
abTrue[2] ~ dnorm(prior_mean_b,prior_tau_b) # normal posterior distribution for b
|
||||
CovObs[1,1] <- pow(sigma1,2)
|
||||
CovObs[2,2] <- pow(sigma2,2)
|
||||
CovObs[1,2] <- ro * sigma1 * sigma2
|
||||
CovObs[2,1] <- CovObs[1,2]
|
||||
TauObs[1:2,1:2] <- inverse(CovObs[1:2,1:2]) # create inverse covariance matrix
|
||||
for(i in 1:N){
|
||||
TauObsI[i,1:2,1:2] <- TauObs[1:2,1:2] * pow(Weights[i],2) # converts prior SD into prior weighted precision
|
||||
|
||||
# given the data, the priors and the covariance, create multivariate normal posteriors for log(a) and b
|
||||
ab[i,1:2] ~ dmnorm(abTrue[1:2],TauObsI[i,1:2,1:2])
|
||||
}
|
||||
}
|
||||
"
|
||||
|
||||
# Write JAGS model
|
||||
cat(Model, file=JAGSFILE)
|
||||
# JAGS settings
|
||||
Nchains = 3 # number of MCMC chains to be used in JAGS
|
||||
Nburnin = 1e4 # number of burn-in runs, to be discarded; 10000 iterations for burn-in
|
||||
Niter = 3e4 # number of iterations after burn-in; 3e4 = 30000 iterations
|
||||
Nthin = 1e1 # subset of iterations to be used for analysis; 1e1 = every 10th iteration
|
||||
# Run JAGS: define data to be passed on in DataJags; determine parameters to be returned in Param2Save; call JAGS with function Jags()
|
||||
DataJags = list(ab=cbind(log10(a),b), N=length(a), Weights=wts, prior_mean_b=prior_mean_b,
|
||||
prior_tau_b=prior_tau_b, prior_mean_log10a=prior_mean_log10a, prior_tau_log10a=prior_tau_log10a,
|
||||
SD_rObs_log10a=SD_rObs_log10a, SD_muObs_log10a=SD_muObs_log10a,
|
||||
SD_rObs_b=SD_rObs_b, SD_muObs_b=SD_muObs_b)
|
||||
Params2Save = c("abTrue","sigma1","sigma2","ro")
|
||||
Jags <- jags(inits=NULL, model.file=JAGSFILE, working.directory=NULL, data=DataJags, parameters.to.save=Params2Save, n.chains=Nchains, n.thin=Nthin, n.iter=Niter, n.burnin=Nburnin)
|
||||
Jags$BUGSoutput # contains the results from the JAGS run
|
||||
# Analyze output
|
||||
abTrue <- Jags$BUGSoutput$sims.list$abTrue
|
||||
mean_log10a <- mean(abTrue[,1]) # true mean of log10(a)
|
||||
sd_log10a <- sd(abTrue[,1]) # true SE of log10(a)
|
||||
mean_b <- mean(abTrue[,2]) # true mean of b
|
||||
sd_b <- sd(abTrue[,2]) # true SE of b
|
||||
mean_sigma_log10a <- mean(Jags$BUGSoutput$sims.list$sigma1) # measurement error of log10(a)
|
||||
sd_sigma_log10a <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigma1), 2, sd)
|
||||
mean_sigma_b <- mean(Jags$BUGSoutput$sims.list$sigma2) # measurement error of b
|
||||
sd_sigma_b <- apply(as.matrix(Jags$BUGSoutput$sims.list$sigma2), 2, sd)
|
||||
ro_ab <- mean(Jags$BUGSoutput$sims.list$ro) # measurement correlation of log10(a),b
|
||||
|
||||
out.list <- list(N=length(a), mean_log10a=mean_log10a, sd_log10a=sd_log10a, mean_b=mean_b, sd_b=sd_b)
|
||||
return(out.list)
|
||||
|
||||
} # End of Functions section
|
||||
|
||||
#--------------------------------
|
||||
# Analysis by Family
|
||||
#--------------------------------
|
||||
# Do LWR analysis by Family, Subfamily and Body shape, depending on available LWR studies
|
||||
# for(Fam in "Acanthuridae") {
|
||||
for(Fam in Families) {
|
||||
Subfamilies <- sort(unique(Data$Subfamily[Data$Family==Fam]))
|
||||
for(SF in Subfamilies) {
|
||||
for(BS in Bshape) {
|
||||
# get species (SpecCodes) in this Subfamily and with this body shape
|
||||
SpecCode.SF.BS <- unique(Data$SpecCode[Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS])
|
||||
# if there are species with this body shape
|
||||
if(length(SpecCode.SF.BS > 0)) {
|
||||
# get priors for this body shape
|
||||
prior <- Get.BS.pr(BS)
|
||||
# get LWR data for this body shape
|
||||
b_raw <- Data$b[Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS]
|
||||
cat("\n")
|
||||
cat("Family =", Fam, ", Subfamily =", SF, ", Body shape =", BS, ", Species =", length(SpecCode.SF.BS), ", LWR =",
|
||||
length(b_raw[is.na(b_raw)==F]), "\n")
|
||||
# if no LWR studies exist for this body shape, assign the respective priors to all species
|
||||
if(length(b_raw[is.na(b_raw)==F])==0) {
|
||||
# assign priors to species with no LWR in this Subfamily with this body shape
|
||||
cat("Assigning overall body shape prior to", length(SpecCode.SF.BS), " species \n")
|
||||
for(SpC in SpecCode.SF.BS) {
|
||||
out.prior <- data.frame(Fam, SF, BS, SpC, 0, prior$mean_log10a, prior$sd_log10a, prior$mean_b, prior$sd_b,
|
||||
paste("all LWR estimates for this BS"))
|
||||
write.table(out.prior, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
|
||||
}
|
||||
} else {
|
||||
|
||||
# Update priors for this body shape using existing LWR studies
|
||||
# get LWR data for this Subfamily and body shape
|
||||
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0)
|
||||
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
|
||||
a <- Data$a[Keep]
|
||||
b <- Data$b[Keep]
|
||||
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
|
||||
# add a first dummy record with prior LWR and low score = 0.3, as pseudo target species
|
||||
# Name of dummy target species is Dum1 dum1
|
||||
TargetSpec = paste("Dum1", "dum1")
|
||||
wts <- c(0.3, wts)
|
||||
a <- c(10^(prior$mean_log10a), a)
|
||||
b <- c(prior$mean_b, b)
|
||||
GenSpec <- c(TargetSpec, GenSpec)
|
||||
# Relabel GenSpec so that TargetSpec = level 1
|
||||
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
|
||||
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
|
||||
Nspecies = nlevels(GenusSpecies) # number of species
|
||||
# run Bayesian analysis for pseudo target species with Subfamily members
|
||||
# The resulting R_mean_log10a, R_sd_log10a, R_mean_b, R_sd_b will be used for species without LWR
|
||||
cat("Updating Subfamily-Bodyshape prior using", Nspecies-1, "species with LWR studies \n")
|
||||
prior.SFam.BS <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies, prior_mean_b=prior$mean_b,
|
||||
prior_tau_b=prior$tau_b, prior_mean_log10a=prior$mean_log10a,
|
||||
prior_tau_log10a=prior$tau_log10a, SD_rObs_log10a=prior$SD_rObs_log10a,
|
||||
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
|
||||
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
|
||||
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
|
||||
SD_muGS_b=prior$SD_muGS_b)
|
||||
|
||||
#------------------------------------------------------------------------------------------
|
||||
# if there are Genera with >= 5 species with LWR, update body shape priors for these Genera
|
||||
#------------------------------------------------------------------------------------------
|
||||
Genera <- unique(as.character(Data$Genus[Keep]))
|
||||
# create empty list of lists for storage of generic priors
|
||||
prior.Gen.BS <- rep(list(list()),length(Genera)) # create a list of empty lists
|
||||
names(prior.Gen.BS) <- Genera # name the list elements according to the Genera
|
||||
for(Genus in Genera){
|
||||
# check if Genus contains >= 5 species with LWR data
|
||||
if(length(unique(Data$SpecCode[Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F &
|
||||
Data$Score>0 & Data$Genus==Genus]))>=5) {
|
||||
# run Subfamily analysis with only data for this genus
|
||||
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0 &
|
||||
Data$Genus==Genus)
|
||||
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
|
||||
a <- Data$a[Keep]
|
||||
b <- Data$b[Keep]
|
||||
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
|
||||
# add a first dummy record with prior LWR and low score = 0.3, as pseudo target species
|
||||
# Name of dummy target species is Dum1 dum1
|
||||
TargetSpec = paste("Dum1", "dum1")
|
||||
wts <- c(0.3, wts)
|
||||
a <- c(10^(prior$mean_log10a), a)
|
||||
b <- c(prior$mean_b, b)
|
||||
GenSpec <- c(TargetSpec, GenSpec)
|
||||
# Relabel GenSpec so that TargetSpec = level 1
|
||||
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
|
||||
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
|
||||
Nspecies = nlevels(GenusSpecies) # number of species
|
||||
# run Bayesian analysis for pseudo target species with Genus members
|
||||
# R_mean_log10a, R_sd_log10a, R_mean_b, R_sd_b will be used for species without LWR
|
||||
cat("Updating prior for Genus =", Genus, ", with", Nspecies -1, "LWR Species \n")
|
||||
prior.Gen.BS[[Genus]] <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies,
|
||||
prior_mean_b=prior.SFam.BS$R_mean_b,
|
||||
prior_tau_b=1/prior.SFam.BS$R_sd_b^2,
|
||||
prior_mean_log10a=prior.SFam.BS$R_mean_log10a,
|
||||
prior_tau_log10a=1/prior.SFam.BS$R_sd_log10a,
|
||||
SD_rObs_log10a=prior$SD_rObs_log10a,
|
||||
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
|
||||
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
|
||||
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
|
||||
SD_muGS_b=prior$SD_muGS_b)
|
||||
}
|
||||
}
|
||||
# new Subfamily-BS priors have been generated
|
||||
# for some genera, new Genus-BS priors have been generated
|
||||
# ---------------------------------------------------------------------
|
||||
# Loop through all species in this Subfamily-BS; assign LWR as appropriate
|
||||
# ---------------------------------------------------------------------
|
||||
for(SpC in SpecCode.SF.BS) {
|
||||
Genus <- as.character(unique(Data$Genus[Data$SpecCode==SpC]))
|
||||
Species <- as.character(unique(Data$Species[Data$SpecCode==SpC]))
|
||||
TargetSpec = paste(Genus, Species)
|
||||
LWR <- length(Data$b[Data$SpecCode==SpC & is.na(Data$b)==F & Data$Score>0])
|
||||
LWRGenspec <- length(unique(Data$SpecCode[Data$BodyShapeI==BS & is.na(Data$b)==F &
|
||||
Data$Score>0 & Data$Genus==Genus]))
|
||||
LWRSFamspec <- length(unique(Data$SpecCode[Data$BodyShapeI==BS & is.na(Data$b)==F &
|
||||
Data$Score>0 & Data$Family==Fam & Data$Subfamily==SF]))
|
||||
#---------------------------------------------------------
|
||||
# >= 5 LWR in target species, run single species analysis
|
||||
if(LWR >= 5) {
|
||||
# Run analysis with data only for this species
|
||||
Keep <- which(Data$SpecCode==SpC & is.na(Data$b)==F & Data$Score>0)
|
||||
wts = Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
|
||||
a = Data$a[Keep]
|
||||
b = Data$b[Keep]
|
||||
|
||||
# determine priors to be used
|
||||
if(LWRGenspec >= 5) {
|
||||
prior_mean_b=prior.Gen.BS[[Genus]]$R_mean_b
|
||||
prior_tau_b=1/prior.Gen.BS[[Genus]]$R_sd_b^2
|
||||
prior_mean_log10a=prior.Gen.BS[[Genus]]$R_mean_log10a
|
||||
prior_tau_log10a=1/prior.Gen.BS[[Genus]]$R_sd_log10a^2
|
||||
} else
|
||||
if (LWRSFamspec > 0) {
|
||||
prior_mean_b=prior.SFam.BS$R_mean_b
|
||||
prior_tau_b=1/prior.SFam.BS$R_sd_b^2
|
||||
prior_mean_log10a=prior.SFam.BS$R_mean_log10a
|
||||
prior_tau_log10a=1/prior.SFam.BS$R_sd_log10a^2
|
||||
} else {
|
||||
prior_mean_b=prior$mean_b
|
||||
prior_tau_b=prior$tau_b
|
||||
prior_mean_log10a=prior$mean_log10a
|
||||
prior_tau_log10a=prior$tau_log10a
|
||||
}
|
||||
cat("Running single species analysis for", TargetSpec, "LWR =", LWR, ", LWR species in Genus=",LWRGenspec,"\n" )
|
||||
# call function for single species analysis
|
||||
post <- SpecLWR(a, b, wts, prior_mean_b=prior_mean_b,
|
||||
prior_tau_b=prior_tau_b, prior_mean_log10a=prior_mean_log10a,
|
||||
prior_tau_log10a=prior_tau_log10a, SD_rObs_log10a=prior$SD_rObs_log10a,
|
||||
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
|
||||
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
|
||||
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
|
||||
SD_muGS_b=prior$SD_muGS_b)
|
||||
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(post$mean_log10a, digits=3), format(post$sd_log10a, digits=3), format(post$mean_b, disgits=3), format(post$sd_b, digits=3),
|
||||
paste("LWR estimates for this species"))
|
||||
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
|
||||
|
||||
} else
|
||||
#--------------------------------------------------------
|
||||
# 1-4 LWR in target species and >= 5 LWR species in Genus
|
||||
# run hierarchical analysis for genus members, with Subfamily-BS prior
|
||||
if(LWR >= 1 & LWRGenspec >=5) {
|
||||
# run Subfamily analysis with only data for this genus
|
||||
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0 &
|
||||
Data$Genus==Genus)
|
||||
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
|
||||
a <- Data$a[Keep]
|
||||
b <- Data$b[Keep]
|
||||
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
|
||||
|
||||
# Relabel GenSpec so that TargetSpec = level 1
|
||||
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
|
||||
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
|
||||
Nspecies = nlevels(GenusSpecies) # number of species
|
||||
# run Bayesian analysis for target species with Genus members
|
||||
cat("Running analysis with congeners for", TargetSpec, ", LWR =", LWR,", LWR species in Genus =", LWRGenspec,"\n")
|
||||
post <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies,
|
||||
prior_mean_b=prior.SFam.BS$R_mean_b,
|
||||
prior_tau_b=1/prior.SFam.BS$R_sd_b^2,
|
||||
prior_mean_log10a=prior.SFam.BS$R_mean_log10a,
|
||||
prior_tau_log10a=1/prior.SFam.BS$R_sd_log10a^2,
|
||||
SD_rObs_log10a=prior$SD_rObs_log10a,
|
||||
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
|
||||
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
|
||||
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
|
||||
SD_muGS_b=prior$SD_muGS_b)
|
||||
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(post$mean_log10a, digits=3), format(post$sd_log10a, digits=3), format(post$mean_b, disgits=3), format(post$sd_b, digits=3),
|
||||
paste("LWR estimates for species & Genus-BS"))
|
||||
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
|
||||
} else
|
||||
|
||||
#-------------------------------------------------------
|
||||
# 1-4 LWR in target species and < 5 LWR species in Genus
|
||||
# run hierarchical analysis for Subfamily members, with bodyshape prior
|
||||
|
||||
if(LWR >= 1 & LWRSFamspec > 1) {
|
||||
# run Subfamily analysis
|
||||
Keep <- which(Data$Family==Fam & Data$Subfamily==SF & Data$BodyShapeI==BS & is.na(Data$b)==F & Data$Score>0)
|
||||
wts <- Data$Score[Keep] # Un-normalized weights (so that Cov is comparable among analyses)
|
||||
a <- Data$a[Keep]
|
||||
b <- Data$b[Keep]
|
||||
GenSpec <- paste(Data$Genus[Keep],Data$Species[Keep])
|
||||
# Relabel GenSpec so that TargetSpec = level 1
|
||||
OtherSpecies = unique(GenSpec[GenSpec != TargetSpec])
|
||||
GenusSpecies = factor(GenSpec, levels=c(TargetSpec, OtherSpecies))
|
||||
Nspecies = nlevels(GenusSpecies) # number of species
|
||||
# run Bayesian analysis for target species with Subfamily members
|
||||
cat("Running analysis with Subfamily members for", TargetSpec, ", LWR =", LWR,", LWR species in Subfamily-BS =",
|
||||
LWRSFamspec, "\n")
|
||||
post <- SpecRelLWR(a, b, wts, GenusSpecies, Nspecies,
|
||||
prior_mean_b=prior$mean_b,
|
||||
prior_tau_b=prior$tau_b,
|
||||
prior_mean_log10a=prior$mean_log10a,
|
||||
prior_tau_log10a=prior$tau_log10a,
|
||||
SD_rObs_log10a=prior$SD_rObs_log10a,
|
||||
SD_muObs_log10a=prior$SD_muObs_log10a, SD_rObs_b=prior$SD_rObs_b,
|
||||
SD_muObs_b=prior$SD_muObs_b, SD_rGS_log10a=prior$SD_rGS_log10a,
|
||||
SD_muGS_log10a=prior$SD_muGS_log10a, SD_rGS_b=prior$SD_rGS_b,
|
||||
SD_muGS_b=prior$SD_muGS_b)
|
||||
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(post$mean_log10a, digits=3), format(post$sd_log10a, digits=3),
|
||||
format(post$mean_b, disgits=3), format(post$sd_b, digits=3),
|
||||
paste("LWR estimates for species & Subfamily-BS"))
|
||||
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
|
||||
} else
|
||||
#--------------------------------------------------
|
||||
# assign Genus-BS priors to target species
|
||||
if(LWRGenspec >= 5) {
|
||||
cat("Assign Genus-BS prior for", TargetSpec, "\n")
|
||||
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(prior.Gen.BS[[Genus]]$mean_log10a, digits=3),
|
||||
format(prior.Gen.BS[[Genus]]$sd_log10a, digits=3),
|
||||
format(prior.Gen.BS[[Genus]]$mean_b, digits=3), format(prior.Gen.BS[[Genus]]$sd_b, digits=3),
|
||||
paste("LWR estimates for this Genus-BS"))
|
||||
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
|
||||
} else {
|
||||
# -----------------------------------------------
|
||||
# assign Subfamily-BS priors to target species
|
||||
cat("Assign Subfamily-BS prior for", TargetSpec,"\n")
|
||||
out.SpC <- data.frame(Fam, SF, BS, SpC, LWR, format(prior.SFam.BS$mean_log10a, digits=3), format(prior.SFam.BS$sd_log10a, digits=3),
|
||||
format(prior.SFam.BS$mean_b, digits=3), format(prior.SFam.BS$sd_b, digits=3), paste("LWR estimates for this Subfamily-BS"))
|
||||
write.table(out.SpC, file=OutFile, append = T, sep=",", dec=".", row.names=F, col.names=F)
|
||||
}
|
||||
} # end of species loop for this Subfamily and body shape
|
||||
|
||||
} # end of section dealing with Subfamily - body shapes that contain LWR estimates
|
||||
|
||||
} # end of section that deals with Subfamily - body shapes that contain species
|
||||
|
||||
} # end of body shape section
|
||||
|
||||
} # end of Subfamily section
|
||||
|
||||
} # end of Family section
|
||||
cat("End", date(),"\n")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,17 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<jardesc>
|
||||
<jar path="EcologicalEngineExecutor/AQUAMAPS_SUITABLE/aquamapsnode.jar"/>
|
||||
<options buildIfNeeded="true" compress="true" descriptionLocation="/EcologicalEngineExecutor/aquamapsjarcreator.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
|
||||
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
|
||||
<selectedProjects/>
|
||||
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
|
||||
<sealing sealJar="false">
|
||||
<packagesToSeal/>
|
||||
<packagesToUnSeal/>
|
||||
</sealing>
|
||||
</manifest>
|
||||
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
|
||||
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java<org.gcube.dataanalysis.executor.generators"/>
|
||||
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java<org.gcube.dataanalysis.executor.nodes.algorithms"/>
|
||||
</selectedElements>
|
||||
</jardesc>
|
|
@ -0,0 +1,32 @@
|
|||
#### Use two appenders, one to log to console, another to log to a file
|
||||
log4j.rootCategory= R
|
||||
|
||||
#### First appender writes to console
|
||||
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.stdout.layout.ConversionPattern=%d{dd/MM/yyyy HH:mm:ss} %p %t %c - %m%n
|
||||
#log4j.appender.stdout.layout.ConversionPattern=%m%n
|
||||
#log4j.appender.stdout.File=Analysis.log
|
||||
|
||||
#### Second appender writes to a file
|
||||
log4j.logger.AnalysisLogger=trace,stdout, R
|
||||
log4j.appender.R=org.apache.log4j.RollingFileAppender
|
||||
#log4j.appender.R=org.apache.log4j.AsyncAppender
|
||||
#log4j.appender.R.Threshold=INFO
|
||||
log4j.appender.R.File=Analysis.log
|
||||
log4j.appender.R.MaxFileSize=50000KB
|
||||
log4j.appender.R.MaxBackupIndex=2
|
||||
log4j.appender.R.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.R.layout.ConversionPattern=%d{dd/MM/yyyy HH:mm:ss} %p %t %c - %m%n
|
||||
#log4j.appender.R.layout.ConversionPattern=%m%n
|
||||
|
||||
#### Third appender writes to a file
|
||||
log4j.logger.org.hibernate=H
|
||||
#log4j.appender.H=org.apache.log4j.RollingFileAppender
|
||||
log4j.appender.H=org.apache.log4j.AsyncAppender
|
||||
#log4j.appender.H.File=HibernateLog.log
|
||||
#log4j.appender.H.MaxFileSize=1024KB
|
||||
#log4j.appender.H.MaxBackupIndex=2
|
||||
log4j.appender.H.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.H.layout.ConversionPattern=%d{dd/MM/yyyy HH:mm:ss} %p %t %c - %m%n
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<hibernate-configuration>
|
||||
<session-factory>
|
||||
<property name="connection.driver_class">org.postgresql.Driver</property>
|
||||
<property name="connection.provider_class">org.hibernate.connection.C3P0ConnectionProvider</property>
|
||||
<property name="connection.url">jdbc:postgresql://localhost/testdb</property>
|
||||
<property name="connection.username">gcube</property>
|
||||
<property name="connection.password">d4science2</property>
|
||||
<!-- <property name="dialect">org.hibernatespatial.postgis.PostgisDialect</property>-->
|
||||
<property name="dialect">org.hibernate.dialect.PostgreSQLDialect</property>
|
||||
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
|
||||
<property name="c3p0.timeout">0</property>
|
||||
<property name="c3p0.max_size">10</property>
|
||||
<property name="c3p0.max_statements">0</property>
|
||||
<property name="c3p0.min_size">1</property>
|
||||
<property name="current_session_context_class">thread</property>
|
||||
</session-factory>
|
||||
</hibernate-configuration>
|
|
@ -0,0 +1,9 @@
|
|||
AQUAMAPS_SUITABLE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable
|
||||
AQUAMAPS_NATIVE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative
|
||||
AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative2050
|
||||
AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable2050
|
||||
AQUAMAPS_NATIVE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNN
|
||||
AQUAMAPS_SUITABLE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNNSuitable
|
||||
FEED_FORWARD_A_N_N_DISTRIBUTION=org.gcube.dataanalysis.ecoengine.spatialdistributions.FeedForwardNeuralNetworkDistribution
|
||||
LWR=org.gcube.dataanalysis.executor.nodes.algorithms.LWR
|
||||
CMSY=org.gcube.dataanalysis.executor.nodes.algorithms.CMSY
|
|
@ -0,0 +1,3 @@
|
|||
DBSCAN=org.gcube.dataanalysis.ecoengine.clustering.DBScan
|
||||
KMEANS=org.gcube.dataanalysis.ecoengine.clustering.KMeans
|
||||
XMEANS=org.gcube.dataanalysis.ecoengine.clustering.XMeansWrapper
|
|
@ -0,0 +1,3 @@
|
|||
DISCREPANCY_ANALYSIS=org.gcube.dataanalysis.ecoengine.evaluation.DiscrepancyAnalysis
|
||||
QUALITY_ANALYSIS=org.gcube.dataanalysis.ecoengine.evaluation.DistributionQualityAnalysis
|
||||
HRS=org.gcube.dataanalysis.ecoengine.evaluation.HabitatRepresentativeness
|
|
@ -0,0 +1,6 @@
|
|||
LOCAL_WITH_DATABASE=org.gcube.dataanalysis.ecoengine.processing.LocalSplitGenerator
|
||||
SIMPLE_LOCAL=org.gcube.dataanalysis.ecoengine.processing.LocalSimpleSplitGenerator
|
||||
D4SCIENCE=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing
|
||||
#OCCURRENCES_MERGER=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing
|
||||
OCCURRENCES_INTERSECTOR=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing
|
||||
#OCCURRENCES_SUBTRACTION=org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing
|
|
@ -0,0 +1,439 @@
|
|||
cat("Retrieving Input Parameters\n")
|
||||
inputFile<-'tacsat.csv'
|
||||
outputFile<-'tacsat_interpolated.csv'
|
||||
|
||||
require(data.table)
|
||||
print(Sys.time())
|
||||
|
||||
memory.size(max = TRUE)
|
||||
memory.limit(size = 4000)
|
||||
|
||||
interCubicHermiteSpline <- function(spltx,spltCon,res,params,headingAdjustment){
|
||||
|
||||
#Formula of Cubic Hermite Spline
|
||||
t <- seq(0,1,length.out=res)
|
||||
F00 <- 2*t^3 -3*t^2 + 1
|
||||
F10 <- t^3-2*t^2+t
|
||||
F01 <- -2*t^3+3*t^2
|
||||
F11 <- t^3-t^2
|
||||
|
||||
#Making tacsat dataset ready
|
||||
spltx[spltCon[,1],"SI_HE"][which(is.na(spltx[spltCon[,1],"SI_HE"]))] <- 0
|
||||
spltx[spltCon[,2],"SI_HE"][which(is.na(spltx[spltCon[,2],"SI_HE"]))] <- 0
|
||||
|
||||
#Heading at begin point in degrees
|
||||
Hx0 <- sin(spltx[spltCon[,1],"SI_HE"]/(180/pi))
|
||||
Hy0 <- cos(spltx[spltCon[,1],"SI_HE"]/(180/pi))
|
||||
|
||||
#Heading at end point in degrees
|
||||
Hx1 <- sin(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi))
|
||||
Hy1 <- cos(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi))
|
||||
|
||||
#Start and end positions
|
||||
Mx0 <- spltx[spltCon[,1],"SI_LONG"]
|
||||
Mx1 <- spltx[spltCon[,2],"SI_LONG"]
|
||||
My0 <- spltx[spltCon[,1],"SI_LATI"]
|
||||
My1 <- spltx[spltCon[,2],"SI_LATI"]
|
||||
|
||||
#Corrected for longitude lattitude effect
|
||||
Hx0 <- Hx0 * params$fm * spltx[spltCon[,1],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
|
||||
Hx1 <- Hx1 * params$fm * spltx[spltCon[,2],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
|
||||
Hy0 <- Hy0 * params$fm * lonLatRatio(spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,1],"SI_LATI"]) * spltx[spltCon[,1],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
|
||||
Hy1 <- Hy1 * params$fm * lonLatRatio(spltx[spltCon[,2],"SI_LONG"],spltx[spltCon[,2],"SI_LATI"]) * spltx[spltCon[,2],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
|
||||
|
||||
#Get the interpolation
|
||||
fx <- outer(F00,Mx0,"*")+outer(F10,Hx0,"*")+outer(F01,Mx1,"*")+outer(F11,Hx1,"*")
|
||||
fy <- outer(F00,My0,"*")+outer(F10,Hy0,"*")+outer(F01,My1,"*")+outer(F11,Hy1,"*")
|
||||
|
||||
#Create output format
|
||||
intsx <- lapply(as.list(1:nrow(spltCon)),function(x){
|
||||
matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2,
|
||||
dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))})
|
||||
return(intsx)}
|
||||
|
||||
rbindTacsat <- function(set1,set2){
|
||||
cln1 <- colnames(set1)
|
||||
cln2 <- colnames(set2)
|
||||
if(any(duplicated(cln1)==TRUE) || any(duplicated(cln2)==TRUE)) stop("Duplicate column names in datasets")
|
||||
idx1 <- which(is.na(pmatch(cln1,cln2))==TRUE)
|
||||
idx2 <- which(is.na(pmatch(cln2,cln1))==TRUE)
|
||||
|
||||
if(length(idx1)>0){
|
||||
for(i in idx1) set2 <- cbind(set2,NA)
|
||||
colnames(set2) <- c(cln2,cln1[idx1])}
|
||||
if(length(idx2)>0){
|
||||
for(i in idx2) set1 <- cbind(set1,NA)
|
||||
colnames(set1) <- c(cln1,cln2[idx2])}
|
||||
cln1 <- colnames(set1)
|
||||
cln2 <- colnames(set2)
|
||||
mtch <- pmatch(cln1,cln2)
|
||||
if(any(is.na(mtch))==TRUE) stop("Cannot find nor create all matching column names")
|
||||
set3 <- rbind(set1,set2[,cln2[mtch]])
|
||||
return(set3)}
|
||||
|
||||
bearing <- function(lon,lat,lonRef,latRef){
|
||||
|
||||
x1 <- lon
|
||||
y1 <- lat
|
||||
x2 <- lonRef
|
||||
y2 <- latRef
|
||||
|
||||
y <- sin((x2-x1)*pi/180) * cos(y2*pi/180)
|
||||
x <- cos(y1*pi/180) * sin(y2*pi/180) - sin(y1*pi/180) * cos(y2*pi/180) * cos((x2-x1)*pi/180)
|
||||
bearing <- atan2(y,x)*180/pi
|
||||
bearing <- (bearing + 360)%%360
|
||||
return(bearing)}
|
||||
|
||||
`distance` <-
|
||||
function(lon,lat,lonRef,latRef){
|
||||
|
||||
pd <- pi/180
|
||||
|
||||
a1<- sin(((latRef-lat)*pd)/2)
|
||||
a2<- cos(lat*pd)
|
||||
a3<- cos(latRef*pd)
|
||||
a4<- sin(((lonRef-lon)*pd)/2)
|
||||
a <- a1*a1+a2*a3*a4*a4
|
||||
|
||||
c <- 2*atan2(sqrt(a),sqrt(1-a));
|
||||
return(6371*c)}
|
||||
|
||||
distanceInterpolation <- function(interpolation){
|
||||
|
||||
res <- unlist(lapply(interpolation,function(x){
|
||||
dims <- dim(x)
|
||||
res <- distance(x[3:dims[1],1],x[3:dims[1],2],x[2:(dims[1]-1),1],x[2:(dims[1]-1),2])
|
||||
return(sum(res,na.rm=TRUE))}))
|
||||
|
||||
return(res)}
|
||||
|
||||
|
||||
equalDistance <- function(interpolation,res=10){
|
||||
|
||||
#Calculate ditance of all interpolations at the same time
|
||||
totDist <- distanceInterpolation(interpolation)
|
||||
#Get dimensions of interpolations
|
||||
lngInt <- lapply(interpolation,dim)
|
||||
|
||||
#Warn if resolution of equal distance is too high compared to original resolution of interpolation
|
||||
if(min(unlist(lngInt)[seq(1,length(totDist),2)],na.rm=TRUE) < 9*res) warnings("Number of intermediate points in the interpolation might be too small for the equal distance pionts chosen")
|
||||
|
||||
#Get distance steps to get equal distance
|
||||
eqStep <- totDist/(res-1)
|
||||
|
||||
#Get x-y values of all interpolations
|
||||
intidx <- matrix(unlist(lapply(interpolation,function(x){return(x[1,])})),ncol=2,byrow=TRUE)
|
||||
|
||||
#Do the calculation
|
||||
result <- lapply(interpolation,function(ind){
|
||||
i <- which(intidx[,1] == ind[1,1] & intidx[,2] == ind[1,2])
|
||||
idx <- apply(abs(outer(
|
||||
cumsum(distance(ind[3:lngInt[[i]][1],1],ind[3:lngInt[[i]][1],2],ind[2:(lngInt[[i]][1]-1),1],ind[2:(lngInt[[i]][1]-1),2])),
|
||||
seq(eqStep[i],totDist[i],eqStep[i]),
|
||||
"-")),
|
||||
2,which.min)+1
|
||||
idx <- c(1,idx)
|
||||
return(ind[c(1,idx+1),])})
|
||||
#Return the equal distance interpolated set in the same format as the interpolated dataset (as a list)
|
||||
return(result)}
|
||||
|
||||
interStraightLine <- function(spltx,spltCon,res){
|
||||
|
||||
fx <- mapply(seq,spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,2],"SI_LONG"],length.out=res)
|
||||
fy <- mapply(seq,spltx[spltCon[,1],"SI_LATI"],spltx[spltCon[,2],"SI_LATI"],length.out=res)
|
||||
|
||||
#Create output format
|
||||
intsx <- lapply(as.list(1:nrow(spltCon)),function(x){
|
||||
matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2,
|
||||
dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))})
|
||||
return(intsx)}
|
||||
|
||||
interpolation2Tacsat <- function(interpolation,tacsat,npoints=10,equalDist=TRUE){
|
||||
|
||||
# This function takes the list of tracks output by interpolateTacsat and converts them back to tacsat format.
|
||||
# The npoints argument is the optional number of points between each 'real' position.
|
||||
tacsat <- sortTacsat(tacsat)
|
||||
if(!"HL_ID" %in% colnames(tacsat)) tacsat$HL_ID <- 1:nrow(tacsat)
|
||||
if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
|
||||
if(equalDist){
|
||||
interpolationEQ <- equalDistance(interpolation,npoints) #Divide points equally along interpolated track (default is 10).
|
||||
} else {
|
||||
interpolationEQ <- lapply(interpolation,function(x){idx <- round(seq(2,nrow(x),length.out=npoints)); return(x[c(1,idx),])})
|
||||
}
|
||||
res <- lapply(interpolationEQ,function(x){
|
||||
idx <- unlist(x[1,1:2]@.Data); x <- data.frame(x)
|
||||
colnames(x) <- c("SI_LONG","SI_LATI")
|
||||
cls <- which(apply(tacsat[c(idx),],2,function(y){return(length(unique(y)))})==1)
|
||||
for(i in cls){
|
||||
x <- cbind(x,rep(tacsat[idx[1],i],nrow(x)));
|
||||
colnames(x) <- c(colnames(x)[1:(ncol(x)-1)],colnames(tacsat)[i])
|
||||
}
|
||||
if(!"VE_COU" %in% colnames(x)) x$VE_COU <- rep(tacsat$VE_COU[idx[1]],nrow(x))
|
||||
if(!"VE_REF" %in% colnames(x)) x$VE_REF <- rep(tacsat$VE_REF[idx[1]],nrow(x))
|
||||
if(!"FT_REF" %in% colnames(x)) x$FT_REF <- rep(tacsat$FT_REF[idx[1]],nrow(x))
|
||||
x$SI_DATIM <- tacsat$SI_DATIM[idx[1]]
|
||||
x$SI_DATIM[-c(1:2)] <- as.POSIXct(cumsum(rep(difftime(tacsat$SI_DATIM[idx[2]],tacsat$SI_DATIM[idx[1]],units="secs")/(nrow(x)-2),nrow(x)-2))+tacsat$SI_DATIM[idx[1]],tz="GMT",format = "%d/%m/%Y %H:%M")
|
||||
x$SI_DATE <- format(x$SI_DATIM,format="%d/%m/%Y")
|
||||
timeNotation <- ifelse(length(unlist(strsplit(tacsat$SI_TIME[1],":")))>2,"secs","mins")
|
||||
if(timeNotation == "secs") x$SI_TIME <- format(x$SI_DATIM,format="%H:%M:%S")
|
||||
if(timeNotation == "mins") x$SI_TIME <- format(x$SI_DATIM,format="%H:%M")
|
||||
x$SI_SP <- mean(c(tacsat$SI_SP[idx[1]],tacsat$SI_SP[idx[2]]),na.rm=TRUE)
|
||||
x$SI_HE <- NA;
|
||||
x$SI_HE[-c(1,nrow(x))] <- bearing(x$SI_LONG[2:(nrow(x)-1)],x$SI_LATI[2:(nrow(x)-1)],x$SI_LONG[3:nrow(x)],x$SI_LATI[3:nrow(x)])
|
||||
x$HL_ID <- tacsat$HL_ID[idx[1]]
|
||||
return(x[-c(1,2,nrow(x)),])})
|
||||
|
||||
#interpolationTot <- do.call(rbind,res)
|
||||
interpolationTot <- res[[1]][,which(duplicated(colnames(res[[1]]))==FALSE)]
|
||||
if(length(res)>1){
|
||||
for(i in 2:length(res)){
|
||||
if(nrow(res[[i]])>0)
|
||||
interpolationTot <- rbindTacsat(interpolationTot,res[[i]][,which(duplicated(colnames(res[[i]]))==FALSE)])
|
||||
}
|
||||
}
|
||||
#tacsatInt <- rbind(interpolationTot,tacsat[,colnames(interpolationTot)])
|
||||
tacsatInt <- rbindTacsat(tacsat,interpolationTot)
|
||||
tacsatInt <- sortTacsat(tacsatInt)
|
||||
|
||||
return(tacsatInt)
|
||||
|
||||
}
|
||||
|
||||
`sortTacsat` <-
|
||||
function(dat){
|
||||
require(doBy)
|
||||
|
||||
if(!"SI_DATIM" %in% colnames(dat)) dat$SI_DATIM <- as.POSIXct(paste(dat$SI_DATE, dat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
|
||||
|
||||
#Sort the tacsat data first by ship, then by date
|
||||
if("VE_REF" %in% colnames(dat)) dat <- orderBy(~VE_REF+SI_DATIM,data=dat)
|
||||
if("OB_REF" %in% colnames(dat)) dat <- orderBy(~OB_REF+SI_DATIM,data=dat)
|
||||
|
||||
return(dat)}
|
||||
|
||||
`lonLatRatio` <-
|
||||
function(x1,lat){
|
||||
#Based on the Haversine formula
|
||||
#At the position, the y-position remains the same, hence, cos(lat)*cos(lat) instead of cos(lat) * cos(y2)
|
||||
a <- cos(lat*pi/180)*cos(lat*pi/180)*sin((0.1*pi/180)/2)*sin((0.1*pi/180)/2);
|
||||
c <- 2*atan2(sqrt(a),sqrt(1-a));
|
||||
R <- 6371;
|
||||
dx1 <- R*c
|
||||
|
||||
return(c(dx1/11.12))}
|
||||
|
||||
|
||||
`an` <-
|
||||
function(x){return(as.numeric(x))}
|
||||
|
||||
|
||||
`findEndTacsat` <-
|
||||
function(tacsat
|
||||
,startTacsat #Starting point of VMS
|
||||
,interval #Specify in minutes, NULL means use all points
|
||||
,margin #Specify the margin in minutes it might deviate from the interval time, in minutes
|
||||
){
|
||||
VMS <- tacsat
|
||||
if(!"SI_DATIM" %in% colnames(VMS)) VMS$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
|
||||
|
||||
startVMS <- startTacsat
|
||||
clStartVMS <- startVMS #Total VMS list starting point instead of subset use
|
||||
iShip <- VMS$VE_REF[startVMS]
|
||||
VMS. <- subset(VMS,VE_REF==iShip)
|
||||
startVMS <- which(VMS$VE_REF[startVMS] == VMS.$VE_REF & VMS$SI_DATIM[startVMS] == VMS.$SI_DATIM)
|
||||
if(clStartVMS != dim(VMS)[1]){
|
||||
if(VMS$VE_REF[clStartVMS] != VMS$VE_REF[clStartVMS+1]){
|
||||
#End of dataset reached
|
||||
endDataSet <- 1
|
||||
endVMS <- NA
|
||||
} else {
|
||||
#Calculate the difference in time between the starting VMS point and its succeeding points
|
||||
diffTime <- difftime(VMS.$SI_DATIM[(startVMS+1):dim(VMS.)[1]],VMS.$SI_DATIM[startVMS],units=c("mins"))
|
||||
if(length(which(diffTime >= (interval-margin) & diffTime <= (interval+margin)))==0){
|
||||
warning("No succeeding point found, no interpolation possible")
|
||||
endVMS <- NA
|
||||
#Check if end of dataset has been reached
|
||||
ifelse(all((diffTime < (interval-margin))==TRUE),endDataSet <- 1,endDataSet <- 0)
|
||||
} else {
|
||||
res <- which(diffTime >= (interval-margin) & diffTime <= (interval+margin))
|
||||
if(length(res)>1){
|
||||
res2 <- which.min(abs(interval-an(diffTime[res])))
|
||||
endVMS <- startVMS + res[res2]
|
||||
endDataSet <- 0
|
||||
} else {
|
||||
endVMS <- startVMS + res
|
||||
endDataSet <- 0
|
||||
}
|
||||
}
|
||||
#Build-in check
|
||||
if(is.na(endVMS)==FALSE){
|
||||
if(!an(difftime(VMS.$SI_DATIM[endVMS],VMS.$SI_DATIM[startVMS],units=c("mins"))) %in% seq((interval-margin),(interval+margin),1)) stop("found endVMS point not within interval range")
|
||||
endVMS <- clStartVMS + (endVMS - startVMS)
|
||||
}
|
||||
|
||||
}
|
||||
} else { endDataSet <- 1; endVMS <- NA}
|
||||
|
||||
return(c(endVMS,endDataSet))}
|
||||
|
||||
`interpolateTacsat` <-
|
||||
function(tacsat #VMS datapoints
|
||||
,interval=120 #Specify in minutes, NULL means use all points
|
||||
,margin=12 #Specify the margin in minutes that the interval might deviate in a search for the next point
|
||||
,res=100 #Resolution of interpolation method (default = 100)
|
||||
,method="cHs" #Specify the method to be used: Straight line (SL) of cubic Hermite spline (cHs)
|
||||
,params=list(fm=0.5,distscale=20,sigline=0.2,st=c(2,6)) #Specify the three parameters: fm, distscale, sigline, speedthreshold
|
||||
,headingAdjustment=0
|
||||
,fast=FALSE){
|
||||
|
||||
if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M")
|
||||
|
||||
#Start interpolating the data
|
||||
if(!method %in% c("cHs","SL")) stop("method selected that does not exist")
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#Fast method or not
|
||||
#-------------------------------------------------------------------------------
|
||||
if(fast){
|
||||
#Interpolation only by vessel, so split tacsat up
|
||||
tacsat$ID <- 1:nrow(tacsat)
|
||||
splitTa <- split(tacsat,tacsat$VE_REF)
|
||||
spltTaCon <- lapply(splitTa,function(spltx){
|
||||
#Calculate time different between every record
|
||||
dftimex <- outer(spltx$SI_DATIM,spltx$SI_DATIM,difftime,units="mins")
|
||||
iStep <- 1
|
||||
connect <- list()
|
||||
counter <- 1
|
||||
#Loop over all possible combinations and store if a connection can be made
|
||||
while(iStep <= nrow(spltx)){
|
||||
endp <- which(dftimex[,iStep] >= (interval - margin) & dftimex[,iStep] <= (interval + margin))
|
||||
if(length(endp)>0){
|
||||
if(length(endp)>1) endp <- endp[which.min(abs(interval - dftimex[endp,iStep]))][1]
|
||||
connect[[counter]] <- c(iStep,endp)
|
||||
counter <- counter + 1
|
||||
iStep <- endp
|
||||
} else { iStep <- iStep + 1}
|
||||
}
|
||||
#Return matrix of conenctions
|
||||
return(do.call(rbind,connect))})
|
||||
|
||||
if(method=="cHs") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){
|
||||
return(interCubicHermiteSpline(spltx=splitTa[[y]],spltCon=spltTaCon[[y]],res,params,headingAdjustment))}),recursive=FALSE)
|
||||
if(method=="SL") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){
|
||||
return(interStraightLine(splitTa[[y]],spltTaCon[[y]],res))}),recursive=FALSE)
|
||||
|
||||
} else {
|
||||
|
||||
|
||||
#Initiate returning result object
|
||||
returnInterpolations <- list()
|
||||
|
||||
#Start iterating over succeeding points
|
||||
for(iStep in 1:(dim(tacsat)[1]-1)){
|
||||
if(iStep == 1){
|
||||
iSuccess <- 0
|
||||
endDataSet <- 0
|
||||
startVMS <- 1
|
||||
ship <- tacsat$VE_REF[startVMS]
|
||||
} else {
|
||||
if(is.na(endVMS)==TRUE) endVMS <- startVMS + 1
|
||||
startVMS <- endVMS
|
||||
#-Check if the end of the dataset is reached
|
||||
if(endDataSet == 1 & rev(unique(tacsat$VE_REF))[1] != ship){
|
||||
startVMS <- which(tacsat$VE_REF == unique(tacsat$VE_REF)[which(unique(tacsat$VE_REF)==ship)+1])[1]
|
||||
ship <- tacsat$VE_REF[startVMS]
|
||||
endDataSet<- 0
|
||||
}
|
||||
if(endDataSet == 1 & rev(unique(tacsat$VE_REF))[1] == ship) endDataSet <- 2 #Final end of dataset
|
||||
}
|
||||
|
||||
#if end of dataset is not reached, try to find succeeding point
|
||||
if(endDataSet != 2){
|
||||
result <- findEndTacsat(tacsat,startVMS,interval,margin)
|
||||
endVMS <- result[1]
|
||||
endDataSet <- result[2]
|
||||
if(is.na(endVMS)==TRUE) int <- 0 #No interpolation possible
|
||||
if(is.na(endVMS)==FALSE) int <- 1 #Interpolation possible
|
||||
|
||||
#Interpolate according to the Cubic Hermite Spline method
|
||||
if(method == "cHs" & int == 1){
|
||||
|
||||
#Define the cHs formula
|
||||
F00 <- numeric()
|
||||
F10 <- numeric()
|
||||
F01 <- numeric()
|
||||
F11 <- numeric()
|
||||
i <- 0
|
||||
t <- seq(0,1,length.out=res)
|
||||
F00 <- 2*t^3 -3*t^2 + 1
|
||||
F10 <- t^3-2*t^2+t
|
||||
F01 <- -2*t^3+3*t^2
|
||||
F11 <- t^3-t^2
|
||||
|
||||
if (is.na(tacsat[startVMS,"SI_HE"])=="TRUE") tacsat[startVMS,"SI_HE"] <- 0
|
||||
if (is.na(tacsat[endVMS, "SI_HE"])=="TRUE") tacsat[endVMS, "SI_HE"] <- 0
|
||||
|
||||
#Heading at begin point in degrees
|
||||
Hx0 <- sin(tacsat[startVMS,"SI_HE"]/(180/pi))
|
||||
Hy0 <- cos(tacsat[startVMS,"SI_HE"]/(180/pi))
|
||||
#Heading at end point in degrees
|
||||
Hx1 <- sin(tacsat[endVMS-headingAdjustment,"SI_HE"]/(180/pi))
|
||||
Hy1 <- cos(tacsat[endVMS-headingAdjustment,"SI_HE"]/(180/pi))
|
||||
|
||||
Mx0 <- tacsat[startVMS, "SI_LONG"]
|
||||
Mx1 <- tacsat[endVMS, "SI_LONG"]
|
||||
My0 <- tacsat[startVMS, "SI_LATI"]
|
||||
My1 <- tacsat[endVMS, "SI_LATI"]
|
||||
|
||||
#Corrected for longitude lattitude effect
|
||||
Hx0 <- Hx0 * params$fm * tacsat[startVMS,"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
|
||||
Hx1 <- Hx1 * params$fm * tacsat[endVMS,"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1])
|
||||
Hy0 <- Hy0 * params$fm * lonLatRatio(tacsat[c(startVMS,endVMS),"SI_LONG"],tacsat[c(startVMS,endVMS),"SI_LATI"])[1] * tacsat[startVMS,"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1])
|
||||
Hy1 <- Hy1 * params$fm * lonLatRatio(tacsat[c(startVMS,endVMS),"SI_LONG"],tacsat[c(startVMS,endVMS),"SI_LATI"])[2] * tacsat[endVMS,"SI_SP"]/((params$st[2]-params$st[1]) /2+params$st[1])
|
||||
|
||||
#Finalizing the interpolation based on cHs
|
||||
fx <- numeric()
|
||||
fy <- numeric()
|
||||
fx <- F00*Mx0+F10*Hx0+F01*Mx1+F11*Hx1
|
||||
fy <- F00*My0+F10*Hy0+F01*My1+F11*Hy1
|
||||
|
||||
#Add one to list of successful interpolations
|
||||
iSuccess <- iSuccess + 1
|
||||
returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))
|
||||
}
|
||||
|
||||
#Interpolate according to a straight line
|
||||
if(method == "SL" & int == 1){
|
||||
fx <- seq(tacsat$SI_LONG[startVMS],tacsat$SI_LONG[endVMS],length.out=res)
|
||||
fy <- seq(tacsat$SI_LATI[startVMS],tacsat$SI_LATI[endVMS],length.out=res)
|
||||
|
||||
#Add one to list of successful interpolations
|
||||
iSuccess <- iSuccess + 1
|
||||
returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return(returnInterpolations)}
|
||||
|
||||
|
||||
cat("Loading Table\n")
|
||||
tacsatX <-read.table(inputFile,sep=",",header=T)
|
||||
cat("Adjusting Columns Types\n")
|
||||
tacsatX<-transform(tacsatX, VE_COU= as.character(VE_COU), VE_REF= as.character(VE_REF), SI_LATI= as.numeric(SI_LATI), SI_LONG= as.numeric(SI_LONG), SI_DATE= as.character(SI_DATE),SI_TIME= as.character(SI_TIME),SI_SP= as.numeric(SI_SP),SI_HE= as.numeric(SI_HE))
|
||||
tacsatX$SI_DATIM=NULL
|
||||
cat("Sorting dataset\n")
|
||||
tacsatS <- sortTacsat(tacsatX)
|
||||
tacsatCut<-tacsatS
|
||||
tacsatCut <- tacsatS[1:1000,]
|
||||
|
||||
cat("Interpolating\n")
|
||||
interpolation <- interpolateTacsat(tacsatCut,interval=interval,margin=margin,res=res, method=method,params=list(fm=fm,distscale=distscale,sigline=sigline,st=st),headingAdjustment=headingAdjustment,fast=fast)
|
||||
cat("Reconstructing Dataset\n")
|
||||
tacsatInt <- interpolation2Tacsat(interpolation=interpolation,tacsat=tacsatCut,npoints=npoints,equalDist=equalDist)
|
||||
tacsatInt <- sortTacsat(tacsatInt)
|
||||
cat("Writing output file\n")
|
||||
write.csv(tacsatInt, outputFile, row.names=T)
|
||||
print(Sys.time())
|
||||
cat("All Done.\n")
|
|
@ -0,0 +1 @@
|
|||
HSPEN_MODELER=org.gcube.dataanalysis.ecoengine.modeling.SimpleModeler
|
|
@ -0,0 +1,4 @@
|
|||
HSPEN=org.gcube.dataanalysis.ecoengine.models.ModelHSPEN
|
||||
AQUAMAPSNN=org.gcube.dataanalysis.ecoengine.models.ModelAquamapsNN
|
||||
FEED_FORWARD_ANN=org.gcube.dataanalysis.ecoengine.models.FeedForwardNN
|
||||
FEED_FORWARD_ANN_FILE=org.gcube.dataanalysis.ecoengine.models.testing.FeedForwardNNFile
|
|
@ -0,0 +1,11 @@
|
|||
AQUAMAPS_SUITABLE=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsSuitableNode
|
||||
AQUAMAPS_NATIVE=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsNativeNode
|
||||
AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsNative2050Node
|
||||
AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.executor.nodes.algorithms.AquamapsSuitable2050Node
|
||||
OCCURRENCES_MERGER=org.gcube.dataanalysis.executor.nodes.transducers.OccurrenceMergingNode
|
||||
OCCURRENCES_INTERSECTOR=org.gcube.dataanalysis.executor.nodes.transducers.OccurrenceIntersectionNode
|
||||
OCCURRENCES_SUBTRACTION=org.gcube.dataanalysis.executor.nodes.transducers.OccurrenceSubtractionNode
|
||||
LWR=org.gcube.dataanalysis.executor.nodes.algorithms.LWR
|
||||
BIONYM=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymFlexibleWorkflowTransducer
|
||||
BIONYM_BIODIV=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymBiodiv
|
||||
CMSY=org.gcube.dataanalysis.executor.nodes.algorithms.CMSY
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,16 @@
|
|||
BIOCLIMATE_HSPEC=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHSPECTransducer
|
||||
BIOCLIMATE_HCAF=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHCAFTransducer
|
||||
BIOCLIMATE_HSPEN=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHSPENTransducer
|
||||
HCAF_INTERPOLATION=org.gcube.dataanalysis.ecoengine.transducers.InterpolationTransducer
|
||||
HCAF_FILTER=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.HcafFilter
|
||||
HSPEN_FILTER=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.HspenFilter
|
||||
ABSENCE_CELLS_FROM_AQUAMAPS=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.MarineAbsencePointsFromAquamapsDistribution
|
||||
PRESENCE_CELLS_GENERATION=org.gcube.dataanalysis.ecoengine.transducers.simplequeryexecutors.MarinePresencePoints
|
||||
OCCURRENCES_MERGER=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsMerger
|
||||
OCCURRENCES_INTERSECTOR=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsIntersector
|
||||
OCCURRENCES_MARINE_TERRESTRIAL=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsInSeaOnEarth
|
||||
OCCURRENCES_DUPLICATES_DELETER=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsDuplicatesDeleter
|
||||
OCCURRENCES_SUBTRACTION=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsSubtraction
|
||||
BIONYM=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymTransducer
|
||||
BIONYM_LOCAL=org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymLocalTransducer
|
||||
SGVM_INTERPOLATION=org.gcube.dataanalysis.executor.rscripts.SGVMS_Interpolation
|
|
@ -0,0 +1,13 @@
|
|||
ANOMALIES_DETECTION=DBSCAN,KMEANS,XMEANS
|
||||
CLASSIFICATION=FEED_FORWARD_A_N_N_DISTRIBUTION
|
||||
CLIMATE=BIOCLIMATE_HSPEC,BIOCLIMATE_HCAF,BIOCLIMATE_HSPEN,HCAF_INTERPOLATION
|
||||
CORRELATION_ANALYSIS=HRS
|
||||
DATA_CLUSTERING=DBSCAN,KMEANS,XMEANS
|
||||
FILTERING=HCAF_FILTER,HSPEN_FILTER
|
||||
FUNCTION_SIMULATION=FEED_FORWARD_A_N_N_DISTRIBUTION
|
||||
OCCURRENCES=ABSENCE_CELLS_FROM_AQUAMAPS,PRESENCE_CELLS_GENERATION,OCCURRENCES_MERGER,OCCURRENCES_INTERSECTOR,OCCURRENCES_MARINE_TERRESTRIAL,OCCURRENCES_DUPLICATES_DELETER,OCCURRENCES_SUBTRACTION
|
||||
PERFORMANCES_EVALUATION=QUALITY_ANALYSIS,DISCREPANCY_ANALYSIS
|
||||
SPECIES_SIMULATION=AQUAMAPS_SUITABLE,AQUAMAPS_NATIVE,AQUAMAPS_NATIVE_2050,AQUAMAPS_SUITABLE_2050,AQUAMAPS_NATIVE_NEURALNETWORK,AQUAMAPS_SUITABLE_NEURALNETWORK
|
||||
TRAINING=HSPEN,AQUAMAPSNN,FEED_FORWARD_ANN
|
||||
TIME_SERIES=HCAF_INTERPOLATION
|
||||
VESSELS=SGVM_INTERPOLATION
|
|
@ -0,0 +1,18 @@
|
|||
<?xml version="1.0" encoding="WINDOWS-1252" standalone="no"?>
|
||||
<jardesc>
|
||||
<jar path="ExecutorScriptFile/org.gcube.dataanalysis.executor.executorscriptplugin.jar"/>
|
||||
<options buildIfNeeded="true" compress="true" descriptionLocation="/ExecutorScriptFile/createscript.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
|
||||
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
|
||||
<selectedProjects/>
|
||||
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
|
||||
<sealing sealJar="false">
|
||||
<packagesToSeal/>
|
||||
<packagesToUnSeal/>
|
||||
</sealing>
|
||||
</manifest>
|
||||
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
|
||||
<javaElement handleIdentifier="=ExecutorScriptFile/src<org.gcube.dataanalysis.executor.plugin.dummy"/>
|
||||
<javaElement handleIdentifier="=ExecutorScriptFile/src<org.gcube.dataanalysis.executor.scripts"/>
|
||||
<javaElement handleIdentifier="=ExecutorScriptFile/src<org.gcube.dataanalysis.executor.plugin"/>
|
||||
</selectedElements>
|
||||
</jardesc>
|
|
@ -0,0 +1,16 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<jardesc>
|
||||
<jar path="C:/Users/coro/Desktop/EcologicalEngineExecutor-1.4.0-SNAPSHOT.jar"/>
|
||||
<options buildIfNeeded="true" compress="true" descriptionLocation="/EcologicalEngineExecutor/deployDesktop.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
|
||||
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
|
||||
<selectedProjects/>
|
||||
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
|
||||
<sealing sealJar="false">
|
||||
<packagesToSeal/>
|
||||
<packagesToUnSeal/>
|
||||
</sealing>
|
||||
</manifest>
|
||||
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
|
||||
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java"/>
|
||||
</selectedElements>
|
||||
</jardesc>
|
|
@ -0,0 +1,16 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<jardesc>
|
||||
<jar path="EcologicalEngineExecutor/PARALLEL_PROCESSING/EcologicalEngineExecutor-1.2.0-SNAPSHOT.jar"/>
|
||||
<options buildIfNeeded="true" compress="true" descriptionLocation="/EcologicalEngineExecutor/deployParallelProcessing.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
|
||||
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
|
||||
<selectedProjects/>
|
||||
<manifest generateManifest="true" manifestLocation="" manifestVersion="1.0" reuseManifest="false" saveManifest="false" usesManifest="true">
|
||||
<sealing sealJar="false">
|
||||
<packagesToSeal/>
|
||||
<packagesToUnSeal/>
|
||||
</sealing>
|
||||
</manifest>
|
||||
<selectedElements exportClassFiles="true" exportJavaFiles="false" exportOutputFolder="false">
|
||||
<javaElement handleIdentifier="=EcologicalEngineExecutor/src\/main\/java"/>
|
||||
</selectedElements>
|
||||
</jardesc>
|
|
@ -0,0 +1,2 @@
|
|||
v. 1.0.0 (20-04-2011)
|
||||
* First release
|
|
@ -0,0 +1 @@
|
|||
Used as a library in the gCube Framework
|
|
@ -0,0 +1,8 @@
|
|||
gCube System - License
|
||||
------------------------------------------------------------
|
||||
|
||||
The gCube/gCore software is licensed as Free Open Source software conveying to
|
||||
the EUPL (http://ec.europa.eu/idabc/eupl).
|
||||
The software and documentation is provided by its authors/distributors "as is"
|
||||
and no expressed or implied warranty is given for its use, quality or fitness
|
||||
for a particular case.
|
|
@ -0,0 +1,2 @@
|
|||
Gianpaolo Coro (gianpaolo.coro@isti.cnr.it), CNR Pisa,
|
||||
Istituto di Scienza e Tecnologie dell'Informazione "A. Faedo"
|
|
@ -0,0 +1,42 @@
|
|||
The gCube System - Ecological Engine Library
|
||||
------------------------------------------------------------
|
||||
|
||||
This work is partially funded by the European Commission in the
|
||||
context of the D4Science project (www.d4science.eu), under the
|
||||
1st call of FP7 IST priority.
|
||||
|
||||
Authors
|
||||
-------
|
||||
|
||||
* Gianpaolo Coro (gianpaolo.coro@isti.cnr.it), CNR Pisa,
|
||||
Istituto di Scienza e Tecnologie dell'Informazione "A. Faedo"
|
||||
|
||||
Version and Release Date
|
||||
------------------------
|
||||
|
||||
version 1.0.0 (22-06-2012)
|
||||
|
||||
Description
|
||||
--------------------
|
||||
|
||||
Support library for statistics analysis on Time Series data.
|
||||
|
||||
|
||||
Download information
|
||||
--------------------
|
||||
|
||||
Source code is available from SVN:
|
||||
http://svn.research-infrastructures.eu/d4science/gcube/trunk/content-management/EcologicalModelling
|
||||
|
||||
Binaries can be downloaded from:
|
||||
http://software.d4science.research-infrastructures.eu/
|
||||
|
||||
Documentation
|
||||
-------------
|
||||
VREManager documentation is available on-line from the Projects Documentation Wiki:
|
||||
https://gcube.wiki.gcube-system.org/gcube/index.php/Ecological_Modeling
|
||||
|
||||
Licensing
|
||||
---------
|
||||
|
||||
This software is licensed under the terms you may find in the file named "LICENSE" in this directory.
|
|
@ -0,0 +1,7 @@
|
|||
<ReleaseNotes xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:noNamespaceSchemaLocation="xsd/changelog.xsd">
|
||||
<Changeset component="org.gcube.data-analysis.ecological-engine-executor.1-0-0"
|
||||
date="2012-02-23">
|
||||
<Change>First Release</Change>
|
||||
</Changeset>
|
||||
</ReleaseNotes>
|
|
@ -0,0 +1,42 @@
|
|||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id>servicearchive</id>
|
||||
<formats>
|
||||
<format>tar.gz</format>
|
||||
</formats>
|
||||
<baseDirectory>/</baseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>${distroDirectory}</directory>
|
||||
<outputDirectory>/</outputDirectory>
|
||||
<useDefaultExcludes>true</useDefaultExcludes>
|
||||
<includes>
|
||||
<include>README</include>
|
||||
<include>LICENSE</include>
|
||||
<include>INSTALL</include>
|
||||
<include>MAINTAINERS</include>
|
||||
<include>changelog.xml</include>
|
||||
</includes>
|
||||
<fileMode>755</fileMode>
|
||||
<filtered>true</filtered>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
<files>
|
||||
<file>
|
||||
<source>${distroDirectory}/profile.xml</source>
|
||||
<outputDirectory>/</outputDirectory>
|
||||
<filtered>true</filtered>
|
||||
</file>
|
||||
<file>
|
||||
<source>target/${build.finalName}.jar</source>
|
||||
<outputDirectory>/${artifactId}</outputDirectory>
|
||||
</file>
|
||||
<file>
|
||||
<source>${distroDirectory}/svnpath.txt</source>
|
||||
<outputDirectory>/${artifactId}</outputDirectory>
|
||||
<filtered>true</filtered>
|
||||
</file>
|
||||
</files>
|
||||
</assembly>
|
|
@ -0,0 +1,25 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<ID></ID>
|
||||
<Type>Library</Type>
|
||||
<Profile>
|
||||
<Description>Ecological Engine Executor Library</Description>
|
||||
<Class>EcologicalEngineExecutor</Class>
|
||||
<Name>${artifactId}</Name>
|
||||
<Version>1.0.0</Version>
|
||||
<Packages>
|
||||
<Software>
|
||||
<Name>${artifactId}</Name>
|
||||
<Version>${version}</Version>
|
||||
<MavenCoordinates>
|
||||
<groupId>${groupId}</groupId>
|
||||
<artifactId>${artifactId}</artifactId>
|
||||
<version>${version}</version>
|
||||
</MavenCoordinates>
|
||||
<Files>
|
||||
<File>${build.finalName}.jar</File>
|
||||
</Files>
|
||||
</Software>
|
||||
</Packages>
|
||||
</Profile>
|
||||
</Resource>
|
|
@ -0,0 +1 @@
|
|||
https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine
|
|
@ -0,0 +1,28 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<ID/>
|
||||
<Type>Service</Type>
|
||||
<Profile>
|
||||
<Description>A Plugin Executing S.O. Scripts</Description>
|
||||
<Class>ExecutorPlugins</Class>
|
||||
<Name>ExecutorScript</Name>
|
||||
<Version>1.0.0</Version>
|
||||
<Packages>
|
||||
<Plugin>
|
||||
<Name>plugin</Name>
|
||||
<Version>1.0.0</Version>
|
||||
<TargetService>
|
||||
<Service>
|
||||
<Class>VREManagement</Class>
|
||||
<Name>Executor</Name>
|
||||
<Version>1.1.0</Version>
|
||||
</Service>
|
||||
<Package>main</Package>
|
||||
<Version>1.0.0</Version>
|
||||
</TargetService>
|
||||
<EntryPoint>org.gcube.dataanalysis.executor.plugin.ScriptPluginContext</EntryPoint>
|
||||
<Files><File>org.gcube.dataanalysis.executor.executorscriptplugin.jar</File></Files>
|
||||
</Plugin>
|
||||
</Packages>
|
||||
</Profile>
|
||||
</Resource>
|
|
@ -0,0 +1,28 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<ID/>
|
||||
<Type>Service</Type>
|
||||
<Profile>
|
||||
<Description>A Plugin Executing S.O. Scripts</Description>
|
||||
<Class>ExecutorPlugins</Class>
|
||||
<Name>ExecutorScript</Name>
|
||||
<Version>1.0.0</Version>
|
||||
<Packages>
|
||||
<Plugin>
|
||||
<Name>plugin</Name>
|
||||
<Version>1.0.0</Version>
|
||||
<TargetService>
|
||||
<Service>
|
||||
<Class>VREManagement</Class>
|
||||
<Name>Executor</Name>
|
||||
<Version>1.1.0</Version>
|
||||
</Service>
|
||||
<Package>main</Package>
|
||||
<Version>1.0.0</Version>
|
||||
</TargetService>
|
||||
<EntryPoint>org.gcube.dataanalysis.executor.plugin.ScriptPluginContext</EntryPoint>
|
||||
<Files><File>org.gcube.dataanalysis.executor.executorscriptplugin.jar</File></Files>
|
||||
</Plugin>
|
||||
</Packages>
|
||||
</Profile>
|
||||
</Resource>
|
Binary file not shown.
|
@ -0,0 +1,138 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<artifactId>maven-parent</artifactId>
|
||||
<groupId>org.gcube.tools</groupId>
|
||||
<version>1.0.0</version>
|
||||
<relativePath />
|
||||
</parent>
|
||||
<groupId>org.gcube.dataanalysis</groupId>
|
||||
<artifactId>EcologicalEngineExecutor</artifactId>
|
||||
<version>1.6.4-SNAPSHOT</version>
|
||||
<name>ecological-engine-executor</name>
|
||||
<description>ecological-engine-executor</description>
|
||||
<properties>
|
||||
<distroDirectory>${project.basedir}/distro</distroDirectory>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.gcube.resourcemanagement</groupId>
|
||||
<artifactId>executor-service</artifactId>
|
||||
<version>[1.2.0-SNAPSHOT,2.0.0-SNAPSHOT)</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.gcube.contentmanagement</groupId>
|
||||
<artifactId>storage-manager-core</artifactId>
|
||||
<version>[2.0.2-SNAPSHOT,4.0.0)</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.gcube.contentmanagement</groupId>
|
||||
<artifactId>storage-manager-wrapper</artifactId>
|
||||
<version>[2.0.2-SNAPSHOT,4.0.0)</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>activemq-core</artifactId>
|
||||
<version>5.6.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.gcube.core</groupId>
|
||||
<artifactId>gcf</artifactId>
|
||||
<version>[1.4.1,2.0.0)</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.gcube.dataanalysis</groupId>
|
||||
<artifactId>ecological-engine</artifactId>
|
||||
<version>[1.8.0-SNAPSHOT,2.0.0)</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.gcube.informationsystem</groupId>
|
||||
<artifactId>is-client</artifactId>
|
||||
<version>[1.5.1,1.6.0]</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.gcube.informationsystem</groupId>
|
||||
<artifactId>is-collector-stubs</artifactId>
|
||||
<version>[3.0.0-SNAPSHOT, 3.1.0)</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.gcube.core</groupId>
|
||||
<artifactId>common-scope</artifactId>
|
||||
<version>[1.2.0-SNAPSHOT,3.0.0)</version>
|
||||
</dependency>
|
||||
<!-- <dependency>
|
||||
<groupId>org.apache.jcs</groupId>
|
||||
<artifactId>jcs</artifactId>
|
||||
<version>1.3</version>
|
||||
</dependency>-->
|
||||
<!-- <dependency> <groupId>org.gcube.dataanalysis</groupId> <artifactId>generic-worker</artifactId>
|
||||
<version>1.2.0-SNAPSHOT</version> <type>jar</type> <scope>compile</scope>
|
||||
<exclusions> <exclusion> <artifactId>common-scope</artifactId> <groupId>org.gcube.core</groupId>
|
||||
</exclusion> </exclusions> </dependency> -->
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>1.6</source>
|
||||
<target>1.6</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>2.12</version>
|
||||
<configuration>
|
||||
<skipTests>true</skipTests>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<version>2.5</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>copy-profile</id>
|
||||
<phase>install</phase>
|
||||
<goals>
|
||||
<goal>copy-resources</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<outputDirectory>target</outputDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>${distroDirectory}</directory>
|
||||
<filtering>true</filtering>
|
||||
<includes>
|
||||
<include>profile.xml</include>
|
||||
</includes>
|
||||
</resource>
|
||||
</resources>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.2</version>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>${distroDirectory}/descriptor.xml</descriptor>
|
||||
</descriptors>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>servicearchive</id>
|
||||
<phase>install</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
|
@ -0,0 +1,148 @@
|
|||
package org.gcube.dataanalysis.executor.generators;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.dataanalysis.ecoengine.configuration.ALG_PROPS;
|
||||
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
||||
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
|
||||
import org.gcube.dataanalysis.ecoengine.interfaces.ActorNode;
|
||||
import org.gcube.dataanalysis.ecoengine.interfaces.Generator;
|
||||
import org.gcube.dataanalysis.ecoengine.interfaces.GenericAlgorithm;
|
||||
import org.gcube.dataanalysis.executor.job.management.DistributedProcessingAgent;
|
||||
|
||||
public class D4ScienceDistributedProcessing implements Generator {
|
||||
public static int maxMessagesAllowedPerJob = 20;
|
||||
public static boolean forceUpload = true;
|
||||
public static String defaultContainerFolder = "PARALLEL_PROCESSING";
|
||||
protected AlgorithmConfiguration config;
|
||||
protected ActorNode distributedModel;
|
||||
protected String mainclass;
|
||||
DistributedProcessingAgent agent;
|
||||
|
||||
public D4ScienceDistributedProcessing(){
|
||||
}
|
||||
|
||||
public D4ScienceDistributedProcessing(AlgorithmConfiguration config) {
|
||||
this.config = config;
|
||||
|
||||
AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile);
|
||||
}
|
||||
|
||||
public void compute() throws Exception {
|
||||
try {
|
||||
agent.compute();
|
||||
distributedModel.postProcess(agent.hasResentMessages(),false);
|
||||
} catch (Exception e) {
|
||||
distributedModel.postProcess(false,true);
|
||||
AnalysisLogger.getLogger().error("ERROR: An Error occurred ", e);
|
||||
throw e;
|
||||
} finally {
|
||||
shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<StatisticalType> getInputParameters() {
|
||||
|
||||
List<StatisticalType> distributionModelParams = new ArrayList<StatisticalType>();
|
||||
distributionModelParams.add(new ServiceType(ServiceParameters.USERNAME,"ServiceUserName","The final user Name"));
|
||||
|
||||
return distributionModelParams;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getResources() {
|
||||
return agent.getResources();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getStatus() {
|
||||
return agent.getStatus();
|
||||
}
|
||||
|
||||
@Override
|
||||
public StatisticalType getOutput() {
|
||||
return distributedModel.getOutput();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ALG_PROPS[] getSupportedAlgorithms() {
|
||||
ALG_PROPS[] p = { ALG_PROPS.PHENOMENON_VS_PARALLEL_PHENOMENON};
|
||||
return p;
|
||||
}
|
||||
|
||||
@Override
|
||||
public INFRASTRUCTURE getInfrastructure() {
|
||||
return INFRASTRUCTURE.D4SCIENCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init() throws Exception {
|
||||
|
||||
Properties p = AlgorithmConfiguration.getProperties(config.getConfigPath() + AlgorithmConfiguration.nodeAlgorithmsFile);
|
||||
String model = config.getModel();
|
||||
String algorithm = null;
|
||||
if ((model!=null) && (model.length()>0))
|
||||
algorithm = model;
|
||||
else
|
||||
algorithm=config.getAgent();
|
||||
|
||||
mainclass = p.getProperty(algorithm);
|
||||
distributedModel = (ActorNode) Class.forName(mainclass).newInstance();
|
||||
distributedModel.setup(config);
|
||||
String scope = config.getGcubeScope();
|
||||
AnalysisLogger.getLogger().info("Using the following scope for the computation:"+scope);
|
||||
String owner = config.getParam("ServiceUserName");
|
||||
int leftNum = distributedModel.getNumberOfLeftElements();
|
||||
int rightNum = distributedModel.getNumberOfRightElements();
|
||||
agent = new DistributedProcessingAgent(config, scope, owner, mainclass, config.getPersistencePath(), algorithm, defaultContainerFolder, maxMessagesAllowedPerJob, forceUpload, leftNum, rightNum);
|
||||
agent.setLogger(AnalysisLogger.getLogger());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setConfiguration(AlgorithmConfiguration config) {
|
||||
this.config = config;
|
||||
AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
try {
|
||||
agent.shutdown();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
try {
|
||||
distributedModel.stop();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLoad() {
|
||||
return agent.getLoad();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getResourceLoad() {
|
||||
return agent.getResourceLoad();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public GenericAlgorithm getAlgorithm() {
|
||||
return distributedModel;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "A D4Science Cloud Processor for Species Distributions";
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,293 @@
|
|||
package org.gcube.dataanalysis.executor.job.management;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.axis.message.addressing.Address;
|
||||
import org.apache.axis.message.addressing.EndpointReferenceType;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.gcube.contentmanagement.graphtools.utils.HttpRequest;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.FileTools;
|
||||
import org.gcube.dataanalysis.ecoengine.configuration.ALG_PROPS;
|
||||
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
||||
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
|
||||
import org.gcube.dataanalysis.ecoengine.connectors.livemonitor.ResourceLoad;
|
||||
import org.gcube.dataanalysis.ecoengine.connectors.livemonitor.Resources;
|
||||
import org.gcube.dataanalysis.ecoengine.utils.Operations;
|
||||
|
||||
import com.thoughtworks.xstream.XStream;
|
||||
|
||||
public class DistributedProcessingAgent {
|
||||
|
||||
protected QueueJobManager jobManager;
|
||||
protected boolean deletefiles = true;
|
||||
protected String mainclass;
|
||||
public int maxElementsAllowedPerJob = 20;
|
||||
protected boolean forceUpload = true;
|
||||
protected boolean stop;
|
||||
protected String gscope;
|
||||
protected String userName;
|
||||
protected String pathToLib;
|
||||
protected String modelName;
|
||||
protected String containerFolder;
|
||||
protected Serializable configurationFile;
|
||||
protected int rightSetNumberOfElements;
|
||||
protected int leftSetNumberOfElements;
|
||||
protected List<String> endpoints;
|
||||
protected int subdivisiondiv;
|
||||
|
||||
protected static String defaultJobOutput = "execution.output";
|
||||
protected static String defaultScriptFile = "script";
|
||||
protected Logger logger;
|
||||
|
||||
/**
|
||||
* A distributed processing agent. Performs a distributed computation doing the MAP of the product of two sets: A and B
|
||||
* Splits over B : A x B1 , A x B2, ... , A x Bn
|
||||
* Prepares a script to be executed on remote nodes
|
||||
* The computation is then sent to remote processors.
|
||||
*/
|
||||
public DistributedProcessingAgent(Serializable configurationFile,
|
||||
String gCubeScope,
|
||||
String computationOwner,
|
||||
String mainClass,
|
||||
String pathToLibFolder,
|
||||
String modelName,
|
||||
String containerFolder,
|
||||
int maxElementsPerJob,
|
||||
boolean forceReUploadofLibs,
|
||||
int leftSetNumberOfElements,
|
||||
int rightSetNumberOfElements
|
||||
) {
|
||||
this.stop = false;
|
||||
this.deletefiles = true;
|
||||
this.gscope=gCubeScope;
|
||||
this.mainclass=mainClass;
|
||||
this.maxElementsAllowedPerJob=maxElementsPerJob;
|
||||
this.forceUpload=forceReUploadofLibs;
|
||||
this.configurationFile=configurationFile;
|
||||
this.rightSetNumberOfElements=rightSetNumberOfElements;
|
||||
this.leftSetNumberOfElements=leftSetNumberOfElements;
|
||||
this.userName=computationOwner;
|
||||
this.pathToLib=pathToLibFolder;
|
||||
this.modelName=modelName;
|
||||
this.containerFolder=containerFolder;
|
||||
}
|
||||
|
||||
public void setLogger(Logger logger){
|
||||
this.logger=logger;
|
||||
}
|
||||
|
||||
public void setEndPoints(List<String> endpoints){
|
||||
this.endpoints=endpoints;
|
||||
}
|
||||
|
||||
public boolean hasResentMessages(){
|
||||
return jobManager.hasResentMessages();
|
||||
}
|
||||
|
||||
public void compute() throws Exception {
|
||||
try {
|
||||
if (logger == null){
|
||||
logger = AnalysisLogger.getLogger();
|
||||
}
|
||||
if (gscope == null)
|
||||
throw new Exception("Null Scope");
|
||||
AnalysisLogger.getLogger().debug("SCOPE: "+gscope);
|
||||
if (endpoints != null) {
|
||||
List<EndpointReferenceType> eprtList = new ArrayList<EndpointReferenceType>();
|
||||
for (String ep : endpoints) {
|
||||
eprtList.add(new EndpointReferenceType(new Address(ep)));
|
||||
}
|
||||
jobManager = new QueueJobManager(gscope, endpoints.size(), eprtList);
|
||||
} else
|
||||
jobManager = new QueueJobManager(gscope, 1);
|
||||
|
||||
int numberOfResources = jobManager.getNumberOfNodes();
|
||||
// we split along right dimension so if elements are less than nodes, we should reduce the number of nodes
|
||||
if (numberOfResources > 0) {
|
||||
// chunkize the number of species in order to lower the computational effort of the workers
|
||||
subdivisiondiv = rightSetNumberOfElements / (numberOfResources * maxElementsAllowedPerJob);
|
||||
int rest = rightSetNumberOfElements % (numberOfResources * maxElementsAllowedPerJob);
|
||||
if (rest > 0)
|
||||
subdivisiondiv++;
|
||||
if (subdivisiondiv == 0)
|
||||
subdivisiondiv = 1;
|
||||
|
||||
executeWork(leftSetNumberOfElements, rightSetNumberOfElements, 0, subdivisiondiv, deletefiles, forceUpload);
|
||||
|
||||
if (jobManager.wasAborted()) {
|
||||
logger.debug("Warning: Job was aborted");
|
||||
// distributionModel.postProcess(false,true);
|
||||
throw new Exception("Job System Error");
|
||||
}
|
||||
else{
|
||||
//postprocess
|
||||
// distributionModel.postProcess(jobManager.hasResentMessages(),false);
|
||||
}
|
||||
|
||||
} else {
|
||||
logger.debug("Warning: No Workers available");
|
||||
throw new Exception("No Workers available");
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("ERROR: An Error occurred ", e);
|
||||
e.printStackTrace();
|
||||
throw e;
|
||||
} finally {
|
||||
shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
private void executeWork(int leftNum, int rightNum, int offset, int numberOfResources, boolean deletefiles, boolean forceUpload) throws Exception {
|
||||
|
||||
String owner = userName;
|
||||
|
||||
int[] chunkSizes = Operations.takeChunks(rightNum, numberOfResources);
|
||||
List<String> arguments = new ArrayList<String>();
|
||||
// chunkize respect to the cells: take a chunk of cells vs all species at each node!
|
||||
|
||||
for (int i = 0; i < chunkSizes.length; i++) {
|
||||
String argumentString = "0 " + leftNum + " " + offset + " " + chunkSizes[i] + " ./ "+mainclass;
|
||||
arguments.add(argumentString);
|
||||
offset += chunkSizes[i];
|
||||
logger.debug("Generator-> Argument " + i + ": " + argumentString);
|
||||
}
|
||||
|
||||
if (owner == null)
|
||||
throw new Exception("Null Owner");
|
||||
|
||||
String pathToDir = new File (pathToLib, containerFolder).getAbsolutePath();
|
||||
|
||||
if (!(new File(pathToDir).exists()))
|
||||
throw new Exception("No Implementation of node-model found for algorithm " + pathToDir);
|
||||
|
||||
if (mainclass == null)
|
||||
throw new Exception("No mainClass found for algorithm " + pathToDir);
|
||||
|
||||
buildScriptFile(modelName, defaultJobOutput, pathToDir, mainclass);
|
||||
|
||||
jobManager.uploadAndExecuteChunkized(AlgorithmConfiguration.StatisticalManagerClass, AlgorithmConfiguration.StatisticalManagerService, owner, pathToDir, "/" + modelName + "/", "./", getScriptName(mainclass), arguments, new XStream().toXML(configurationFile), deletefiles, forceUpload);
|
||||
|
||||
}
|
||||
|
||||
private String getScriptName(String fullMainClass){
|
||||
String scriptName = defaultScriptFile+"_"+fullMainClass.substring(fullMainClass.lastIndexOf(".")+1)+".sh";
|
||||
return scriptName;
|
||||
}
|
||||
// builds a job.sh
|
||||
public void buildScriptFile(String jobName, String jobOutput, String jarsPath, String fullMainClass) throws Exception {
|
||||
File expectedscript = new File(jarsPath,getScriptName(fullMainClass));
|
||||
if (!expectedscript.exists()) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("#!/bin/sh\n");
|
||||
sb.append("# " + jobName + "\n");
|
||||
sb.append("cd $1\n");
|
||||
sb.append("\n");
|
||||
sb.append("java -Xmx1024M -classpath ./:");
|
||||
File jarsPathF = new File(jarsPath);
|
||||
File[] files = jarsPathF.listFiles();
|
||||
|
||||
for (File jar : files) {
|
||||
|
||||
if (jar.getName().endsWith(".jar")) {
|
||||
sb.append("./" + jar.getName());
|
||||
sb.append(":");
|
||||
}
|
||||
}
|
||||
|
||||
sb.deleteCharAt(sb.length() - 1);
|
||||
sb.append(" " + fullMainClass + " $2 " + jobOutput);
|
||||
sb.append("\n");
|
||||
|
||||
AnalysisLogger.getLogger().trace("D4ScienceGenerator->Generating script in " + expectedscript.getAbsolutePath());
|
||||
FileTools.saveString(expectedscript.getAbsolutePath(), sb.toString(), true, "UTF-8");
|
||||
}
|
||||
AnalysisLogger.getLogger().trace("D4ScienceGenerator->Script " + expectedscript.getAbsolutePath()+" yet exists!");
|
||||
}
|
||||
|
||||
public String getResources() {
|
||||
Resources res = new Resources();
|
||||
try {
|
||||
int activeNodes = jobManager.getActiveNodes();
|
||||
for (int i = 0; i < activeNodes; i++) {
|
||||
try {
|
||||
res.addResource("Worker_" + (i + 1), 100);
|
||||
} catch (Exception e1) {
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
AnalysisLogger.getLogger().debug("D4ScienceGenerator->active nodes not ready");
|
||||
}
|
||||
if ((res != null) && (res.list != null))
|
||||
return HttpRequest.toJSon(res.list).replace("resId", "resID");
|
||||
else
|
||||
return "";
|
||||
}
|
||||
|
||||
public float getStatus() {
|
||||
try {
|
||||
if (stop)
|
||||
return 100f;
|
||||
else
|
||||
if (jobManager!=null)
|
||||
return Math.max(0.5f, jobManager.getStatus() * 100f);
|
||||
else
|
||||
return 0;
|
||||
} catch (Exception e) {
|
||||
return 0f;
|
||||
}
|
||||
}
|
||||
|
||||
public ALG_PROPS[] getSupportedAlgorithms() {
|
||||
ALG_PROPS[] p = { ALG_PROPS.PHENOMENON_VS_PARALLEL_PHENOMENON};
|
||||
return p;
|
||||
}
|
||||
|
||||
public INFRASTRUCTURE getInfrastructure() {
|
||||
return INFRASTRUCTURE.D4SCIENCE;
|
||||
}
|
||||
|
||||
public void shutdown() {
|
||||
|
||||
try {
|
||||
jobManager.stop();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
stop = true;
|
||||
}
|
||||
|
||||
public String getLoad() {
|
||||
long tk = System.currentTimeMillis();
|
||||
ResourceLoad rs = null;
|
||||
if (jobManager!=null)
|
||||
rs = new ResourceLoad(tk, jobManager.currentNumberOfStages*subdivisiondiv);
|
||||
else
|
||||
rs = new ResourceLoad(tk, 0);
|
||||
return rs.toString();
|
||||
}
|
||||
|
||||
private long lastTime;
|
||||
private int lastProcessed;
|
||||
public String getResourceLoad() {
|
||||
long thisTime = System.currentTimeMillis();
|
||||
int processedRecords = 0;
|
||||
if ((jobManager!=null) && (subdivisiondiv>0))
|
||||
processedRecords = jobManager.currentNumberOfStages*subdivisiondiv;
|
||||
|
||||
int estimatedProcessedRecords = 0;
|
||||
if (processedRecords == lastProcessed) {
|
||||
estimatedProcessedRecords = Math.round(((float) thisTime * (float) lastProcessed) / (float) lastTime);
|
||||
} else {
|
||||
lastProcessed = processedRecords;
|
||||
estimatedProcessedRecords = lastProcessed;
|
||||
}
|
||||
lastTime = thisTime;
|
||||
ResourceLoad rs = new ResourceLoad(thisTime, estimatedProcessedRecords);
|
||||
return rs.toString();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,821 @@
|
|||
package org.gcube.dataanalysis.executor.job.management;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Timer;
|
||||
import java.util.TimerTask;
|
||||
import java.util.UUID;
|
||||
|
||||
import javax.jms.ExceptionListener;
|
||||
import javax.jms.JMSException;
|
||||
import javax.jms.Message;
|
||||
import javax.jms.MessageListener;
|
||||
|
||||
import org.apache.activemq.ActiveMQConnection;
|
||||
import org.apache.axis.message.addressing.EndpointReferenceType;
|
||||
import org.gcube.common.core.contexts.GHNContext;
|
||||
import org.gcube.common.core.informationsystem.client.AtomicCondition;
|
||||
import org.gcube.common.core.informationsystem.client.ISClient;
|
||||
import org.gcube.common.core.informationsystem.client.RPDocument;
|
||||
import org.gcube.common.core.informationsystem.client.queries.WSResourceQuery;
|
||||
import org.gcube.common.core.scope.GCUBEScope;
|
||||
import org.gcube.common.scope.api.ScopeProvider;
|
||||
import org.gcube.contentmanagement.blobstorage.resource.StorageObject;
|
||||
import org.gcube.contentmanagement.blobstorage.service.IClient;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.contentmanager.storageclient.wrapper.AccessType;
|
||||
import org.gcube.contentmanager.storageclient.wrapper.MemoryType;
|
||||
import org.gcube.contentmanager.storageclient.wrapper.StorageClient;
|
||||
import org.gcube.dataanalysis.ecoengine.utils.Operations;
|
||||
import org.gcube.dataanalysis.executor.messagequeue.ATTRIBUTE;
|
||||
import org.gcube.dataanalysis.executor.messagequeue.Consumer;
|
||||
import org.gcube.dataanalysis.executor.messagequeue.Producer;
|
||||
import org.gcube.dataanalysis.executor.messagequeue.QCONSTANTS;
|
||||
import org.gcube.dataanalysis.executor.messagequeue.QueueManager;
|
||||
import org.gcube.dataanalysis.executor.scripts.ScriptIOWorker;
|
||||
import org.gcube.vremanagement.executor.stubs.ExecutorCall;
|
||||
import org.gcube.vremanagement.executor.stubs.TaskCall;
|
||||
import org.gcube.vremanagement.executor.stubs.TaskProxy;
|
||||
|
||||
public class QueueJobManager {
|
||||
|
||||
// broadcast message period
|
||||
public static int broadcastTimePeriod = 120000;
|
||||
// max silence before computation stops
|
||||
public static int maxSilenceTimeBeforeComputationStop = 10800000;
|
||||
// max number of retries per computation step
|
||||
public static int maxNumberOfComputationRetries = 1;
|
||||
// period for controlling a node activity
|
||||
public static int computationWatcherTimerPeriod = 120000;
|
||||
// max number of message to put in a queue
|
||||
// protected static int maxNumberOfMessages = 20;
|
||||
public static int maxNumberOfStages = Integer.MAX_VALUE;//10;
|
||||
// timeout for resending a message
|
||||
public static int queueWatcherMaxwaitingTime = QCONSTANTS.refreshStatusTime;// * 5;
|
||||
|
||||
protected int maxFailureTries;
|
||||
private static String pluginName = "generic-worker";//"GenericWorker";
|
||||
|
||||
protected String scope;
|
||||
protected GCUBEScope gscope;
|
||||
protected String session;
|
||||
|
||||
protected boolean yetstopped;
|
||||
protected boolean messagesresent;
|
||||
protected float status;
|
||||
protected boolean abort;
|
||||
protected boolean shutdown;
|
||||
|
||||
protected List<EndpointReferenceType> eprs;
|
||||
protected int activeNodes;
|
||||
protected int computingNodes;
|
||||
protected int numberOfMessages;
|
||||
protected int totalNumberOfMessages;
|
||||
protected int actualNumberOfNodes;
|
||||
protected int totalNumberOfStages;
|
||||
public int currentNumberOfStages;
|
||||
|
||||
// files management
|
||||
protected List<String> filenames;
|
||||
protected List<String> fileurls;
|
||||
|
||||
// queue parameters
|
||||
protected String queueName;
|
||||
protected String queueResponse;
|
||||
protected String queueURL;
|
||||
protected String queueUSER;
|
||||
protected String queuePWD;
|
||||
protected org.gcube.dataanalysis.executor.messagequeue.Consumer consumer;
|
||||
protected Producer producer;
|
||||
|
||||
Timer broadcastTimer;
|
||||
Timer computationWatcherTimer;
|
||||
ComputationTimerWatcher computationWatcher;
|
||||
String serviceClass;
|
||||
String serviceName;
|
||||
String owner;
|
||||
String localDir;
|
||||
String remoteDir;
|
||||
String outputDir;
|
||||
String script;
|
||||
List<String> arguments;
|
||||
String configuration;
|
||||
boolean deletefiles;
|
||||
StatusListener statuslistener;
|
||||
|
||||
private void resetAllVars() {
|
||||
scope = null;
|
||||
gscope = null;
|
||||
|
||||
yetstopped = false;
|
||||
messagesresent = false;
|
||||
status = 0;
|
||||
abort = false;
|
||||
shutdown = false;
|
||||
|
||||
eprs = null;
|
||||
activeNodes = 0;
|
||||
computingNodes = 0;
|
||||
numberOfMessages = 0;
|
||||
|
||||
actualNumberOfNodes = 0;
|
||||
filenames = null;
|
||||
fileurls = null;
|
||||
|
||||
queueName = null;
|
||||
queueResponse = null;
|
||||
queueURL = null;
|
||||
queueUSER = null;
|
||||
queuePWD = null;
|
||||
consumer = null;
|
||||
producer = null;
|
||||
broadcastTimer = null;
|
||||
computationWatcherTimer = null;
|
||||
computationWatcher = null;
|
||||
serviceClass = null;
|
||||
serviceName = null;
|
||||
owner = null;
|
||||
localDir = null;
|
||||
remoteDir = null;
|
||||
outputDir = null;
|
||||
script = null;
|
||||
arguments = null;
|
||||
configuration = null;
|
||||
deletefiles = false;
|
||||
statuslistener = null;
|
||||
}
|
||||
|
||||
public int getActiveNodes() {
|
||||
return computingNodes;
|
||||
}
|
||||
|
||||
public float getStatus() {
|
||||
float innerStatus = 0;
|
||||
if (totalNumberOfMessages != 0)
|
||||
innerStatus = (1f - ((float) numberOfMessages / (float) totalNumberOfMessages));
|
||||
if (totalNumberOfStages == 0)
|
||||
return innerStatus;
|
||||
else {
|
||||
float offset = ((float) Math.max(currentNumberOfStages - 1, 0)) / (float) totalNumberOfStages;
|
||||
float status = offset + (innerStatus / (float) totalNumberOfStages);
|
||||
// AnalysisLogger.getLogger().info("stages: "+totalNumberOfStages+" inner status: "+innerStatus+" currentStage: "+currentNumberOfStages+" status: "+status);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
// there is only one node from the client point of view
|
||||
public int getNumberOfNodes() {
|
||||
if (eprs.size() > 0)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
public void setNumberOfNodes(int newNumberOfNodes) {
|
||||
// ignore this setting in this case
|
||||
}
|
||||
|
||||
private void init(String scope, int numberOfNodes) throws Exception {
|
||||
resetAllVars();
|
||||
// init scope variables
|
||||
this.scope = scope;
|
||||
gscope = GCUBEScope.getScope(scope);
|
||||
// introduce a session
|
||||
// initialize flags
|
||||
shutdown = false;
|
||||
yetstopped = false;
|
||||
messagesresent = false;
|
||||
abort = false;
|
||||
// find all the nodes - initialize the eprs
|
||||
findNodes(scope);
|
||||
}
|
||||
|
||||
public QueueJobManager(String scope, int numberOfNodes) throws Exception {
|
||||
init(scope, numberOfNodes);
|
||||
}
|
||||
|
||||
public QueueJobManager(String scope, int numberOfNodes, List<EndpointReferenceType> eprs) throws Exception {
|
||||
init(scope, numberOfNodes);
|
||||
this.eprs = eprs;
|
||||
}
|
||||
|
||||
private void setGlobalVars(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, String configuration, boolean deletefiles) {
|
||||
this.serviceClass = serviceClass;
|
||||
this.serviceName = serviceName;
|
||||
this.owner = owner;
|
||||
this.localDir = localDir;
|
||||
this.remoteDir = remoteDir;
|
||||
this.outputDir = outputDir;
|
||||
this.script = script;
|
||||
this.arguments = arguments;
|
||||
this.configuration = configuration;
|
||||
this.deletefiles = deletefiles;
|
||||
}
|
||||
|
||||
private int totalmessages = 0;
|
||||
|
||||
public boolean uploadAndExecuteChunkized(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, String configuration, boolean deletefiles, boolean forceUpload) throws Exception {
|
||||
long t0 = System.currentTimeMillis();
|
||||
|
||||
int elements = arguments.size();
|
||||
/*
|
||||
* int div = elements / (maxNumberOfMessages); int rest = elements % (maxNumberOfMessages); if (rest > 0) div++; if (div == 0) { div = 1; }
|
||||
*/
|
||||
session = (("" + UUID.randomUUID()).replace("-", "") + Math.random()).replace(".", "");
|
||||
int[] chunkSizes = null;
|
||||
//up to 1120 species we don't make stages
|
||||
if (elements>maxNumberOfStages)
|
||||
chunkSizes = Operations.takeChunks(elements, maxNumberOfStages);
|
||||
else {
|
||||
chunkSizes = new int[1];
|
||||
chunkSizes[0]=elements;
|
||||
}
|
||||
int allchunks = chunkSizes.length;
|
||||
totalNumberOfStages = allchunks;
|
||||
currentNumberOfStages = 0;
|
||||
int start = 0;
|
||||
totalmessages = 0;
|
||||
AnalysisLogger.getLogger().info("Starting the computation in "+allchunks+" stages");
|
||||
for (int i = 0; i < allchunks; i++) {
|
||||
numberOfMessages = totalNumberOfMessages = 0;
|
||||
currentNumberOfStages++;
|
||||
int end = Math.min(elements, start + chunkSizes[i]);
|
||||
AnalysisLogger.getLogger().info("Computing the chunk number " + (i + 1) + " of " + allchunks + " between " + start + " and " + (end - 1));
|
||||
List<String> sublist = new ArrayList<String>();
|
||||
for (int j = start; j < end; j++)
|
||||
sublist.add(arguments.get(j));
|
||||
|
||||
AnalysisLogger.getLogger().info("size sub:" + sublist.size());
|
||||
// totalmessages=totalmessages+sublist.size();
|
||||
uploadAndExecute(serviceClass, serviceName, owner, localDir, remoteDir, outputDir, script, sublist, configuration, deletefiles, forceUpload);
|
||||
if (abort)
|
||||
break;
|
||||
start = end;
|
||||
AnalysisLogger.getLogger().info("Processed chunk number " + (i + 1));
|
||||
|
||||
}
|
||||
|
||||
currentNumberOfStages = totalNumberOfStages;
|
||||
AnalysisLogger.getLogger().info("Finished computation on all chunks and messages " + totalmessages);
|
||||
AnalysisLogger.getLogger().info("Whole Procedure done in " + (System.currentTimeMillis() - t0) + " ms");
|
||||
return (!abort);
|
||||
}
|
||||
|
||||
private boolean uploadAndExecute(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, String configuration, boolean deletefiles, boolean forceUpload) throws Exception {
|
||||
int numberOfRetries = maxNumberOfComputationRetries;
|
||||
boolean recompute = true;
|
||||
|
||||
while ((numberOfRetries > 0) && (recompute)) {
|
||||
long t0 = System.currentTimeMillis();
|
||||
// if (numberOfRetries<maxNumberOfComputationRetries)
|
||||
init(scope, 1);
|
||||
|
||||
AnalysisLogger.getLogger().info("Computation Try number " + (maxNumberOfComputationRetries + 1 - numberOfRetries));
|
||||
|
||||
AnalysisLogger.getLogger().info("Contacting " + actualNumberOfNodes + " Nodes");
|
||||
// set globals
|
||||
setGlobalVars(serviceClass, serviceName, owner, localDir, remoteDir, outputDir, script, arguments, configuration, deletefiles);
|
||||
// if not yet uploaded , upload required files
|
||||
uploadFilesOnStorage(forceUpload);
|
||||
|
||||
// initializing queue
|
||||
setQueueVariables();
|
||||
// broadcast a message to all executors for purging previous queues
|
||||
// purgeQueues();
|
||||
createClientProducer();
|
||||
broadcastListenCommandToExecutorNodes();
|
||||
|
||||
maxFailureTries = activeNodes * 1;
|
||||
|
||||
broadcastTimer = new Timer();
|
||||
broadcastTimer.schedule(new Broadcaster(), broadcastTimePeriod, broadcastTimePeriod);
|
||||
|
||||
computationWatcherTimer = new Timer();
|
||||
computationWatcher = new ComputationTimerWatcher(maxSilenceTimeBeforeComputationStop);
|
||||
computationWatcherTimer.schedule(computationWatcher, computationWatcherTimerPeriod, computationWatcherTimerPeriod);
|
||||
|
||||
// send all messages
|
||||
sendMessages();
|
||||
createClientConsumer();
|
||||
// wait for messages
|
||||
waitForMessages();
|
||||
|
||||
AnalysisLogger.getLogger().info("Wait for message finished - checking result");
|
||||
if (numberOfMessages == 0) {
|
||||
AnalysisLogger.getLogger().info("All tasks have correctly finished!");
|
||||
}
|
||||
|
||||
/*
|
||||
* else{ AnalysisLogger.getLogger().info("Timeout - Warning Some Task is missing!"); for (int k=0;k<finishedChunks.length;k++){ if (finishedChunks[k]==0){ AnalysisLogger.getLogger().info("Sending Again message number " + k); Map<String, Object> inputs = generateInputMessage(filenames, fileurls, outputDir, script, arguments.get(k), k, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles); producer.sendMessage(inputs, 0); AnalysisLogger.getLogger().info("Sent Message " + k); } } waitForMessages(); if (numberOfMessages>0){ abort = true; } }
|
||||
*/
|
||||
|
||||
// deleteRemoteFolder();
|
||||
// summary
|
||||
AnalysisLogger.getLogger().info("-SUMMARY-");
|
||||
for (int i = 0; i < totalNumberOfMessages; i++) {
|
||||
if (activeMessages[i])
|
||||
AnalysisLogger.getLogger().info("Error : the Message Number " + i + " Was Never Processed!");
|
||||
if (resentMessages[i] > 0) {
|
||||
messagesresent = true;
|
||||
AnalysisLogger.getLogger().info("Warning : the Message Number " + i + " Was resent " + resentMessages[i] + " Times");
|
||||
}
|
||||
}
|
||||
AnalysisLogger.getLogger().info("-SUMMARY END-");
|
||||
|
||||
stop();
|
||||
AnalysisLogger.getLogger().info("Stopped");
|
||||
AnalysisLogger.getLogger().info("Single Step Procedure done in " + (System.currentTimeMillis() - t0) + " ms");
|
||||
activeNodes = 0;
|
||||
numberOfRetries--;
|
||||
if (abort) {
|
||||
recompute = true;
|
||||
if (numberOfRetries > 0)
|
||||
Thread.sleep(10000);
|
||||
} else
|
||||
recompute = false;
|
||||
}
|
||||
|
||||
return (!abort);
|
||||
}
|
||||
|
||||
public boolean hasResentMessages() {
|
||||
return messagesresent;
|
||||
}
|
||||
|
||||
public void waitForMessages() throws Exception {
|
||||
AnalysisLogger.getLogger().info("Waiting...");
|
||||
while ((numberOfMessages > 0) && (!abort)) {
|
||||
Thread.sleep(2000);
|
||||
|
||||
// long tcurrent = System.currentTimeMillis();
|
||||
// if ((tcurrent - waitTime) > maxwaitingTime) {
|
||||
// break;
|
||||
// }
|
||||
}
|
||||
AnalysisLogger.getLogger().info("...Stop - Abort?" + abort);
|
||||
}
|
||||
|
||||
public boolean wasAborted() {
|
||||
return abort;
|
||||
}
|
||||
|
||||
public void purgeQueues() throws Exception {
|
||||
AnalysisLogger.getLogger().info("Purging Queue");
|
||||
List<WorkerWatcher> tasksProxies = new ArrayList<WorkerWatcher>();
|
||||
for (int j = 0; j < actualNumberOfNodes; j++) {
|
||||
try {
|
||||
contactNodes(tasksProxies, j, queueName, queueUSER, queuePWD, queueURL, queueResponse, session, "true");
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
AnalysisLogger.getLogger().info("Error in purgin queue on node " + j);
|
||||
}
|
||||
}
|
||||
AnalysisLogger.getLogger().info("Queue Purged");
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
try {
|
||||
if (!yetstopped) {
|
||||
if (broadcastTimer != null) {
|
||||
AnalysisLogger.getLogger().info("Stopping Broadcaster");
|
||||
broadcastTimer.cancel();
|
||||
broadcastTimer.purge();
|
||||
}
|
||||
|
||||
if (computationWatcherTimer != null) {
|
||||
AnalysisLogger.getLogger().info("Stopping Watcher");
|
||||
computationWatcherTimer.cancel();
|
||||
computationWatcherTimer.purge();
|
||||
}
|
||||
|
||||
AnalysisLogger.getLogger().info("Purging Status Listener");
|
||||
|
||||
if (statuslistener != null)
|
||||
statuslistener.destroyAllWatchers();
|
||||
|
||||
AnalysisLogger.getLogger().info("Stopping Producer and Consumer");
|
||||
|
||||
try{
|
||||
producer.stop();
|
||||
producer.closeSession();
|
||||
}catch(Exception e1){}
|
||||
try{
|
||||
consumer.stop();
|
||||
consumer.closeSession();
|
||||
}catch(Exception e2){}
|
||||
|
||||
AnalysisLogger.getLogger().info("Purging Remote Queues");
|
||||
purgeQueues();
|
||||
|
||||
yetstopped = true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
AnalysisLogger.getLogger().info("Not completely stopped");
|
||||
}
|
||||
}
|
||||
|
||||
private void contactNodes(List<WorkerWatcher> tasksProxies, int order, String queueName, String queueUSER, String queuePWD, String queueURL, String queueResponse, String session, String purgeQueue) throws Exception {
|
||||
// generate the input map according to the arguments
|
||||
Map<String, Object> inputs = generateWorkerInput(queueName, queueUSER, queuePWD, queueURL, queueResponse, session, purgeQueue);
|
||||
AnalysisLogger.getLogger().info("Inputs " + inputs);
|
||||
// take the i-th endpoint of the executor
|
||||
EndpointReferenceType selectedEPR = eprs.get(order);
|
||||
AnalysisLogger.getLogger().info("Broadcasting to node " + (order + 1) + " on " + selectedEPR.getAddress());
|
||||
// run the executor script
|
||||
ExecutorCall call = new ExecutorCall(pluginName, gscope);
|
||||
call.setEndpointReference(selectedEPR);
|
||||
TaskCall task = null;
|
||||
AnalysisLogger.getLogger().info("EPR:" + selectedEPR);
|
||||
task = call.launch(inputs);
|
||||
// AnalysisLogger.getLogger().info("Task EPR:" + task.getEndpointReference());
|
||||
TaskProxy proxy = task.getProxy();
|
||||
tasksProxies.add(new WorkerWatcher(proxy, AnalysisLogger.getLogger()));
|
||||
// AnalysisLogger.getLogger().info("Contacting node " + (order + 1) + " OK on " + selectedEPR);
|
||||
|
||||
}
|
||||
|
||||
private int findNodes(String scopeString) throws Exception {
|
||||
AnalysisLogger.getLogger().debug("SCOPE:"+scopeString);
|
||||
GCUBEScope scope = GCUBEScope.getScope(scopeString);
|
||||
ISClient client = GHNContext.getImplementation(ISClient.class);
|
||||
WSResourceQuery wsquery = client.getQuery(WSResourceQuery.class);
|
||||
wsquery.addAtomicConditions(new AtomicCondition("//gc:ServiceName", "Executor"));
|
||||
wsquery.addAtomicConditions(new AtomicCondition("/child::*[local-name()='Task']/name[text()='" + pluginName + "']", pluginName));
|
||||
List<RPDocument> listdoc = client.execute(wsquery, scope);
|
||||
EndpointReferenceType epr = null;
|
||||
eprs = new ArrayList<EndpointReferenceType>();
|
||||
int numberOfEP = 0;
|
||||
for (RPDocument resource : listdoc) {
|
||||
epr = resource.getEndpoint();
|
||||
numberOfEP++;
|
||||
eprs.add(epr);
|
||||
}
|
||||
AnalysisLogger.getLogger().info("Found " + numberOfEP + " endpoints");
|
||||
// get current number of available nodes
|
||||
actualNumberOfNodes = eprs.size();
|
||||
return numberOfEP;
|
||||
}
|
||||
|
||||
private void setQueueVariables() throws Exception {
|
||||
queueName = "D4ScienceJob"; // + session;
|
||||
queueResponse = queueName + "Response"+session;
|
||||
//general scope
|
||||
queueURL = gscope.getServiceMap().getEndpoints(GHNContext.MSGBROKER).iterator().next().getAddress().toString();
|
||||
//tests on ecosystem
|
||||
//TODO: delete this!
|
||||
// queueURL = "tcp://ui.grid.research-infrastructures.eu:6166";
|
||||
// queueURL = "tcp://message-broker.d4science.research-infrastructures.eu:6166";
|
||||
AnalysisLogger.getLogger().info("Queue for the scope: " + queueURL);
|
||||
if (queueURL==null){
|
||||
if (scope.startsWith("/gcube"))
|
||||
queueURL = "tcp://ui.grid.research-infrastructures.eu:6166";
|
||||
else
|
||||
queueURL = "tcp://message-broker.d4science.research-infrastructures.eu:6166";
|
||||
}
|
||||
queueUSER = ActiveMQConnection.DEFAULT_USER;
|
||||
queuePWD = ActiveMQConnection.DEFAULT_PASSWORD;
|
||||
}
|
||||
|
||||
public void deleteRemoteFolder() throws Exception {
|
||||
ScopeProvider.instance.set(scope);
|
||||
IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED,MemoryType.VOLATILE).getClient();
|
||||
// IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, gscope).getClient();
|
||||
AnalysisLogger.getLogger().info("Removing Remote Dir " + remoteDir);
|
||||
client.removeDir().RDir(remoteDir);
|
||||
AnalysisLogger.getLogger().info("Removed");
|
||||
}
|
||||
|
||||
private void uploadFilesOnStorage(boolean forceupload) throws Exception {
|
||||
ScopeProvider.instance.set(scope);
|
||||
IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, MemoryType.VOLATILE).getClient();
|
||||
// IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, gscope).getClient();
|
||||
File dir = new File(localDir);
|
||||
File[] files = dir.listFiles();
|
||||
AnalysisLogger.getLogger().info("Start uploading");
|
||||
filenames = new ArrayList<String>();
|
||||
fileurls = new ArrayList<String>();
|
||||
boolean uploadFiles = forceupload;
|
||||
// if we do not force upload then check if the folder is yet there
|
||||
if (!uploadFiles) {
|
||||
List<StorageObject> remoteObjects = client.showDir().RDir(remoteDir);
|
||||
// only upload files if they are not yet uploaded
|
||||
if (remoteObjects.size() == 0)
|
||||
uploadFiles = true;
|
||||
}
|
||||
if (!uploadFiles)
|
||||
AnalysisLogger.getLogger().info("Unnecessary to Uploading Files");
|
||||
|
||||
AnalysisLogger.getLogger().info("Loading files");
|
||||
for (File sfile : files) {
|
||||
if (sfile.getName().startsWith("."))
|
||||
continue;
|
||||
|
||||
String localf = sfile.getAbsolutePath();
|
||||
String filename = sfile.getName();
|
||||
String remotef = remoteDir + sfile.getName();
|
||||
if (uploadFiles) {
|
||||
client.put(true).LFile(localf).RFile(remotef);
|
||||
AnalysisLogger.getLogger().info("Uploading File "+localf+" as remote file "+remotef);
|
||||
}
|
||||
String url = client.getUrl().RFile(remotef);
|
||||
// AnalysisLogger.getLogger().info("URL obtained: " + url);
|
||||
filenames.add(filename);
|
||||
fileurls.add(url);
|
||||
}
|
||||
AnalysisLogger.getLogger().info("Loading finished");
|
||||
|
||||
}
|
||||
|
||||
private void broadcastListenCommandToExecutorNodes() throws Exception {
|
||||
AnalysisLogger.getLogger().info("Submitting script to Remote Queue " + queueName);
|
||||
List<WorkerWatcher> tasksProxies = new ArrayList<WorkerWatcher>();
|
||||
try{
|
||||
findNodes(scope);
|
||||
}catch(Exception e){
|
||||
AnalysisLogger.getLogger().info("Error in Finding nodes - using previous value");
|
||||
}
|
||||
activeNodes = actualNumberOfNodes;
|
||||
// launch the tasks
|
||||
for (int i = 0; i < actualNumberOfNodes; i++) {
|
||||
try {
|
||||
contactNodes(tasksProxies, i, queueName, queueUSER, queuePWD, queueURL, queueResponse, session, "false");
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
AnalysisLogger.getLogger().info("Error in Contacting nodes");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void createClientProducer() throws Exception {
|
||||
AnalysisLogger.getLogger().info("Creating Message Queue and Producer");
|
||||
// create the Producer
|
||||
QueueManager qm = new QueueManager();
|
||||
qm.createAndConnect(queueUSER, queuePWD, queueURL, queueName);
|
||||
producer = new Producer(qm, queueName);
|
||||
AnalysisLogger.getLogger().info("Producer OK");
|
||||
}
|
||||
|
||||
private void createClientConsumer() throws Exception {
|
||||
AnalysisLogger.getLogger().info("Creating Response Message Queue and Consumer");
|
||||
// create the listener
|
||||
statuslistener = new StatusListener();
|
||||
QueueManager qm1 = new QueueManager();
|
||||
qm1.createAndConnect(queueUSER, queuePWD, queueURL, queueResponse);
|
||||
consumer = new Consumer(qm1, statuslistener, statuslistener, queueResponse);
|
||||
AnalysisLogger.getLogger().info("Consumers OK");
|
||||
}
|
||||
|
||||
boolean activeMessages[];
|
||||
public int resentMessages[];
|
||||
|
||||
private void sendMessages() throws Exception {
|
||||
int i = 0;
|
||||
numberOfMessages = arguments.size();
|
||||
totalNumberOfMessages = numberOfMessages;
|
||||
AnalysisLogger.getLogger().info("Messages To Send " + numberOfMessages);
|
||||
activeMessages = new boolean[numberOfMessages];
|
||||
resentMessages = new int[numberOfMessages];
|
||||
for (String argum : arguments) {
|
||||
Map<String, Object> inputs = generateInputMessage(filenames, fileurls, outputDir, script, argum, i, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles, false);
|
||||
producer.sendMessage(inputs, 0);
|
||||
AnalysisLogger.getLogger().info("Send " + i);
|
||||
activeMessages[i] = true;
|
||||
i++;
|
||||
}
|
||||
AnalysisLogger.getLogger().info("Messages Sent " + numberOfMessages);
|
||||
}
|
||||
|
||||
private Map<String, Object> generateInputMessage(Object filenames, Object fileurls, String outputDir, String script, String argum, int i, String scope, String serviceClass, String serviceName, String owner, String remoteDir, String session, String configuration, boolean deletefiles, boolean duplicateMessage) {
|
||||
Map<String, Object> inputs = new HashMap<String, Object>();
|
||||
|
||||
inputs.put(ATTRIBUTE.FILE_NAMES.name(), filenames);
|
||||
inputs.put(ATTRIBUTE.FILE_URLS.name(), fileurls);
|
||||
inputs.put(ATTRIBUTE.OUTPUTDIR.name(), outputDir);
|
||||
inputs.put(ATTRIBUTE.SCRIPT.name(), script);
|
||||
inputs.put(ATTRIBUTE.ARGUMENTS.name(), argum + " " + duplicateMessage);
|
||||
inputs.put(ATTRIBUTE.ORDER.name(), "" + i);
|
||||
inputs.put(ATTRIBUTE.SCOPE.name(), scope);
|
||||
inputs.put(ATTRIBUTE.SERVICE_CLASS.name(), serviceClass);
|
||||
inputs.put(ATTRIBUTE.SERVICE_NAME.name(), serviceName);
|
||||
inputs.put(ATTRIBUTE.OWNER.name(), owner);
|
||||
inputs.put(ATTRIBUTE.REMOTEDIR.name(), remoteDir);
|
||||
inputs.put(ATTRIBUTE.CLEAN_CACHE.name(), "" + deletefiles);
|
||||
inputs.put(ATTRIBUTE.QSESSION.name(), session);
|
||||
inputs.put(ATTRIBUTE.CONFIGURATION.name(), configuration);
|
||||
inputs.put(ATTRIBUTE.TOPIC_RESPONSE_NAME.name(), queueResponse);
|
||||
inputs.put(ATTRIBUTE.QUEUE_USER.name(), queueUSER);
|
||||
inputs.put(ATTRIBUTE.QUEUE_PASSWORD.name(), queuePWD);
|
||||
inputs.put(ATTRIBUTE.QUEUE_URL.name(), queueURL);
|
||||
return inputs;
|
||||
}
|
||||
|
||||
private Map<String, Object> generateWorkerInput(String queueName, String queueUser, String queuePassword, String queueURL, String queueResponse, String session, String purge) {
|
||||
|
||||
Map<String, Object> inputs = new HashMap<String, Object>();
|
||||
|
||||
inputs.put(ATTRIBUTE.TOPIC_NAME.name(), ScriptIOWorker.toInputString(queueName));
|
||||
inputs.put(ATTRIBUTE.QUEUE_USER.name(), ScriptIOWorker.toInputString(queueUser));
|
||||
inputs.put(ATTRIBUTE.QUEUE_PASSWORD.name(), ScriptIOWorker.toInputString(queuePassword));
|
||||
inputs.put(ATTRIBUTE.QUEUE_URL.name(), ScriptIOWorker.toInputString(queueURL));
|
||||
inputs.put(ATTRIBUTE.TOPIC_RESPONSE_NAME.name(), ScriptIOWorker.toInputString(queueResponse));
|
||||
inputs.put(ATTRIBUTE.QSESSION.name(), session);
|
||||
inputs.put(ATTRIBUTE.ERASE.name(), purge);
|
||||
return inputs;
|
||||
}
|
||||
|
||||
public class Broadcaster extends TimerTask {
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
AnalysisLogger.getLogger().info("(((((((((((((((((((((((((((------Broadcasting Information To Watchers------)))))))))))))))))))))))))))");
|
||||
broadcastListenCommandToExecutorNodes();
|
||||
AnalysisLogger.getLogger().info("(((((((((((((((((((((((((((------END Broadcasting Information To Watchers------)))))))))))))))))))))))))))");
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
AnalysisLogger.getLogger().info("--------------------------------Broadcaster: Error Sending Listen Message to Executors------)))))))))))))))))))))))))))");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public class ComputationTimerWatcher extends TimerTask {
|
||||
|
||||
long maxTime;
|
||||
long lastTimeClock;
|
||||
|
||||
public ComputationTimerWatcher(long maxtime) {
|
||||
this.maxTime = maxtime;
|
||||
this.lastTimeClock = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
lastTimeClock = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public void setmaxTime(long maxTime) {
|
||||
this.maxTime = maxTime;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
long t0 = System.currentTimeMillis();
|
||||
AnalysisLogger.getLogger().info("Computation Watcher Timing Is " + (t0 - lastTimeClock)+" max computation time is "+maxTime);
|
||||
if ((t0 - lastTimeClock) > maxTime) {
|
||||
AnalysisLogger.getLogger().info("Computation Watcher - Computation Timeout: Closing Queue Job Manager!!!");
|
||||
abort();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
AnalysisLogger.getLogger().info("Error Taking clock");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public synchronized void abort() {
|
||||
AnalysisLogger.getLogger().info("Computation Aborted");
|
||||
this.abort = true;
|
||||
}
|
||||
|
||||
public class StatusListener implements MessageListener, ExceptionListener {
|
||||
|
||||
private QueueWorkerWatcher[] watchers;
|
||||
|
||||
synchronized public void onException(JMSException ex) {
|
||||
abort();
|
||||
AnalysisLogger.getLogger().info("JMS Exception occured. Shutting down client.");
|
||||
}
|
||||
|
||||
private synchronized void addWatcher(int order) {
|
||||
if (watchers == null)
|
||||
watchers = new QueueWorkerWatcher[totalNumberOfMessages];
|
||||
|
||||
QueueWorkerWatcher watcher = watchers[order];
|
||||
if (watcher != null) {
|
||||
destroyWatcher(order);
|
||||
}
|
||||
|
||||
Map<String, Object> message = generateInputMessage(filenames, fileurls, outputDir, script, arguments.get(order), order, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles, true);
|
||||
watchers[order] = new QueueWorkerWatcher(producer, message, order);
|
||||
}
|
||||
|
||||
private synchronized void resetWatcher(int order) {
|
||||
if (watchers == null)
|
||||
watchers = new QueueWorkerWatcher[totalNumberOfMessages];
|
||||
else if (watchers[order] != null)
|
||||
watchers[order].resetTime();
|
||||
}
|
||||
|
||||
private synchronized void destroyWatcher(int order) {
|
||||
if (watchers != null && watchers[order] != null) {
|
||||
if (watchers[order].hasResent())
|
||||
resentMessages[order] = resentMessages[order] + 1;
|
||||
|
||||
watchers[order].destroy();
|
||||
watchers[order] = null;
|
||||
AnalysisLogger.getLogger().info("Destroyed Watcher number " + order);
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void destroyAllWatchers() {
|
||||
if (watchers != null) {
|
||||
for (int i = 0; i < watchers.length; i++) {
|
||||
destroyWatcher(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void onMessage(Message message) {
|
||||
|
||||
// get message
|
||||
try {
|
||||
|
||||
HashMap<String, Object> details = (HashMap<String, Object>) (HashMap<String, Object>) message.getObjectProperty(ATTRIBUTE.CONTENT.name());
|
||||
String status = (String) details.get(ATTRIBUTE.STATUS.name());
|
||||
String order = "" + details.get(ATTRIBUTE.ORDER.name());
|
||||
String nodeaddress = (String) details.get(ATTRIBUTE.NODE.name());
|
||||
String msession = (String) details.get(ATTRIBUTE.QSESSION.name());
|
||||
Object error = details.get(ATTRIBUTE.ERROR.name());
|
||||
|
||||
AnalysisLogger.getLogger().info("Current session " + session);
|
||||
if ((msession != null) && (msession.equals(session))) {
|
||||
AnalysisLogger.getLogger().info("Session " + session + " is right - acknowledge");
|
||||
message.acknowledge();
|
||||
AnalysisLogger.getLogger().info("Session " + session + " acknowledged");
|
||||
int orderInt = -1;
|
||||
try {
|
||||
orderInt = Integer.parseInt(order);
|
||||
} catch (Exception e3) {
|
||||
e3.printStackTrace();
|
||||
}
|
||||
if (orderInt > -1) {
|
||||
|
||||
// reset the watcher
|
||||
if (computationWatcher!=null)
|
||||
computationWatcher.reset();
|
||||
|
||||
AnalysisLogger.getLogger().info("Task number " + order + " is " + status + " on node " + nodeaddress + " and session " + session);
|
||||
|
||||
if (status.equals(ATTRIBUTE.STARTED.name())) {
|
||||
computingNodes++;
|
||||
addWatcher(orderInt);
|
||||
}
|
||||
if (status.equals(ATTRIBUTE.PROCESSING.name())) {
|
||||
|
||||
resetWatcher(orderInt);
|
||||
} else if (status.equals(ATTRIBUTE.FINISHED.name())) {
|
||||
|
||||
totalmessages++;
|
||||
computingNodes--;
|
||||
destroyWatcher(orderInt);
|
||||
if (numberOfMessages > 0)
|
||||
numberOfMessages--;
|
||||
|
||||
AnalysisLogger.getLogger().info("Remaining " + numberOfMessages + " messages to manage");
|
||||
activeMessages[orderInt] = false;
|
||||
|
||||
} else if (status.equals(ATTRIBUTE.FATAL_ERROR.name())) {
|
||||
if (error!=null)
|
||||
AnalysisLogger.getLogger().info("REPORTED FATAL_ERROR on " +nodeaddress+" : ");
|
||||
AnalysisLogger.getLogger().info(error);
|
||||
|
||||
computingNodes--;
|
||||
if (maxFailureTries <= 0) {
|
||||
AnalysisLogger.getLogger().info("Too much Failures - Aborting");
|
||||
destroyAllWatchers();
|
||||
abort();
|
||||
} else {
|
||||
AnalysisLogger.getLogger().info("Failure Occurred - Now Resending Message " + orderInt);
|
||||
resentMessages[orderInt] = resentMessages[orderInt] + 1;
|
||||
maxFailureTries--;
|
||||
// resend message
|
||||
Map<String, Object> retrymessage = generateInputMessage(filenames, fileurls, outputDir, script, arguments.get(orderInt), orderInt, scope, serviceClass, serviceName, owner, remoteDir, session, configuration, deletefiles, true);
|
||||
producer.sendMessage(retrymessage, QCONSTANTS.timeToLive);
|
||||
AnalysisLogger.getLogger().info("Failure Occurred - Resent Message " + orderInt);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} else
|
||||
AnalysisLogger.getLogger().info("Ignoring message " + order + " with status " + status);
|
||||
} else {
|
||||
AnalysisLogger.getLogger().info("wrong session " + msession + " ignoring message");
|
||||
// consumer.manager.session.recover();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
|
||||
AnalysisLogger.getLogger().info("Error reading details ", e);
|
||||
AnalysisLogger.getLogger().info("...Aborting Job...");
|
||||
abort();
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
package org.gcube.dataanalysis.executor.job.management;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Timer;
|
||||
import java.util.TimerTask;
|
||||
|
||||
import javax.jms.Message;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.dataanalysis.executor.messagequeue.ATTRIBUTE;
|
||||
import org.gcube.dataanalysis.executor.messagequeue.Producer;
|
||||
import org.gcube.dataanalysis.executor.messagequeue.QCONSTANTS;
|
||||
|
||||
public class QueueWorkerWatcher {
|
||||
|
||||
protected int maxwaitingTime = 2*QueueJobManager.queueWatcherMaxwaitingTime;
|
||||
private long lastTimeClock;
|
||||
Timer watcher;
|
||||
Producer producer;
|
||||
Map<String, Object> message;
|
||||
public boolean resent=false;
|
||||
int order;
|
||||
|
||||
public QueueWorkerWatcher(Producer producer, Map<String, Object> message, int order) {
|
||||
this.producer = producer;
|
||||
this.message = message;
|
||||
resent=false;
|
||||
this.order = order;
|
||||
|
||||
watcher = new Timer();
|
||||
watcher.schedule(new Controller(), 0, QCONSTANTS.refreshStatusTime);
|
||||
resetTime();
|
||||
}
|
||||
|
||||
public synchronized void resetTime() {
|
||||
lastTimeClock = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public synchronized void destroy() {
|
||||
if (watcher != null) {
|
||||
watcher.cancel();
|
||||
watcher.purge();
|
||||
watcher = null;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasResent(){
|
||||
return resent;
|
||||
}
|
||||
|
||||
private class Controller extends TimerTask {
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
long t0 = System.currentTimeMillis();
|
||||
AnalysisLogger.getLogger().debug("Watcher "+order+" Timing Is "+(t0 - lastTimeClock)+ " max waiting time: "+maxwaitingTime);
|
||||
if ((t0 - lastTimeClock) > maxwaitingTime) {
|
||||
|
||||
AnalysisLogger.getLogger().info("Watcher "+order+" Time Is Over "+(t0 - lastTimeClock));
|
||||
|
||||
AnalysisLogger.getLogger().info("Watcher "+order+" Re-Sending Message "+message);
|
||||
producer.sendMessage(message, QCONSTANTS.timeToLive);
|
||||
// QueueJobManager.resentMessages[Integer.parseInt(""+message.get(ATTRIBUTE.ORDER.name()))]=QueueJobManager.resentMessages[Integer.parseInt(""+message.get(ATTRIBUTE.ORDER.name()))]+1;
|
||||
resent = true;
|
||||
AnalysisLogger.getLogger().info("Watcher "+order+" Destroying watcher");
|
||||
destroy();
|
||||
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,248 @@
|
|||
package org.gcube.dataanalysis.executor.job.management;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
import org.apache.axis.message.addressing.EndpointReferenceType;
|
||||
import org.gcube.common.core.contexts.GHNContext;
|
||||
import org.gcube.common.core.informationsystem.client.AtomicCondition;
|
||||
import org.gcube.common.core.informationsystem.client.ISClient;
|
||||
import org.gcube.common.core.informationsystem.client.RPDocument;
|
||||
import org.gcube.common.core.informationsystem.client.queries.WSResourceQuery;
|
||||
import org.gcube.common.core.scope.GCUBEScope;
|
||||
import org.gcube.common.scope.api.ScopeProvider;
|
||||
import org.gcube.contentmanagement.blobstorage.service.IClient;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.contentmanager.storageclient.wrapper.AccessType;
|
||||
import org.gcube.contentmanager.storageclient.wrapper.StorageClient;
|
||||
import org.gcube.dataanalysis.executor.scripts.ScriptIOWorker;
|
||||
import org.gcube.vremanagement.executor.stubs.ExecutorCall;
|
||||
import org.gcube.vremanagement.executor.stubs.TaskCall;
|
||||
import org.gcube.vremanagement.executor.stubs.TaskProxy;
|
||||
|
||||
public class RemoteJobManager {
|
||||
|
||||
private static String pluginName = "ExecutorScript";
|
||||
private int actualNumberOfNodes;
|
||||
private GCUBEScope gscope;
|
||||
private List<EndpointReferenceType> eprs;
|
||||
float status;
|
||||
boolean abort;
|
||||
boolean shutdown;
|
||||
protected int activeNodes;
|
||||
String scope;
|
||||
|
||||
public int getActiveNodes() {
|
||||
return activeNodes;
|
||||
}
|
||||
|
||||
public float getStatus() {
|
||||
return status;
|
||||
}
|
||||
|
||||
public int getNumberOfNodes() {
|
||||
return actualNumberOfNodes;
|
||||
}
|
||||
|
||||
public void setNumberOfNodes(int newNumberOfNodes) {
|
||||
actualNumberOfNodes = newNumberOfNodes;
|
||||
}
|
||||
|
||||
public void init(String scope, int numberOfNodes) throws Exception {
|
||||
this.scope = scope;
|
||||
gscope = GCUBEScope.getScope(scope);
|
||||
AnalysisLogger.getLogger().debug("Using the following scope for this computation:"+gscope);
|
||||
shutdown = false;
|
||||
yetuploaded = false;
|
||||
if (eprs == null)
|
||||
actualNumberOfNodes = findNodes(scope);
|
||||
else
|
||||
actualNumberOfNodes = eprs.size();
|
||||
|
||||
if (numberOfNodes < actualNumberOfNodes)
|
||||
actualNumberOfNodes = numberOfNodes;
|
||||
|
||||
}
|
||||
|
||||
public RemoteJobManager(String scope, int numberOfNodes) throws Exception {
|
||||
init(scope, numberOfNodes);
|
||||
}
|
||||
|
||||
public RemoteJobManager(String scope, int numberOfNodes, List<EndpointReferenceType> eprs) throws Exception {
|
||||
this.eprs = eprs;
|
||||
init(scope, numberOfNodes);
|
||||
}
|
||||
|
||||
List<String> filenames;
|
||||
List<String> fileurls;
|
||||
boolean yetuploaded;
|
||||
String session;
|
||||
|
||||
public boolean uploadAndExecute(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, boolean deletefiles) throws Exception {
|
||||
boolean executeAll = false;
|
||||
long t0 = System.currentTimeMillis();
|
||||
//if not yet uploaded , upload required files
|
||||
if (!yetuploaded) {
|
||||
ScopeProvider.instance.set(scope);
|
||||
IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED).getClient();
|
||||
// IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, gscope).getClient();
|
||||
File dir = new File(localDir);
|
||||
File[] files = dir.listFiles();
|
||||
AnalysisLogger.getLogger().debug("Start uploading");
|
||||
filenames = new ArrayList<String>();
|
||||
fileurls = new ArrayList<String>();
|
||||
for (File sfile : files) {
|
||||
String localf = sfile.getAbsolutePath();
|
||||
String filename = sfile.getName();
|
||||
String remotef = remoteDir + sfile.getName();
|
||||
client.put(true).LFile(localf).RFile(remotef);
|
||||
String url = client.getUrl().RFile(remotef);
|
||||
AnalysisLogger.getLogger().debug("URL created: " + url);
|
||||
filenames.add(filename);
|
||||
fileurls.add(url);
|
||||
}
|
||||
AnalysisLogger.getLogger().debug("Upload end");
|
||||
yetuploaded = true;
|
||||
session = (""+UUID.randomUUID()).replace("-", "");
|
||||
}
|
||||
|
||||
//if the number of available nodes is higher than zero launch the tasks
|
||||
if (actualNumberOfNodes > 0) {
|
||||
|
||||
AnalysisLogger.getLogger().debug("Executing script on " + actualNumberOfNodes + " nodes");
|
||||
int len = arguments.size();
|
||||
List<WorkerWatcher> tasksProxies = new ArrayList<WorkerWatcher>();
|
||||
activeNodes = 0;
|
||||
//launch the tasks
|
||||
for (int i = 0; i < actualNumberOfNodes; i++) {
|
||||
String argum = "";
|
||||
//supply the arguments if they are available
|
||||
if (i < len)
|
||||
argum = arguments.get(i);
|
||||
//generate the input map according to the arguments
|
||||
Map<String, Object> inputs = generateInput(filenames, fileurls, outputDir, script, argum, i, scope, serviceClass, serviceName, owner, remoteDir,session,deletefiles);
|
||||
AnalysisLogger.getLogger().debug("-> Owner: " + owner + " ServiceClass: " + serviceClass + " ServiceName:" + serviceName + " remoteDir:" + remoteDir);
|
||||
//take the i-th endpoint of the executor
|
||||
EndpointReferenceType selectedEPR = eprs.get(i);
|
||||
AnalysisLogger.getLogger().debug("Launching node " + (i + 1) + " on " + selectedEPR);
|
||||
//run the executor script
|
||||
ExecutorCall call = new ExecutorCall(pluginName, gscope);
|
||||
call.setEndpointReference(selectedEPR);
|
||||
TaskCall task = null;
|
||||
task = call.launch(inputs);
|
||||
TaskProxy proxy = task.getProxy();
|
||||
tasksProxies.add(new WorkerWatcher(proxy, AnalysisLogger.getLogger()));
|
||||
|
||||
AnalysisLogger.getLogger().debug("Launching node " + (i + 1) + " OK on " + selectedEPR);
|
||||
//add the task to the list in order to reuse it
|
||||
}
|
||||
|
||||
activeNodes = actualNumberOfNodes;
|
||||
AnalysisLogger.getLogger().debug("Launch Finished - Controlling Status");
|
||||
int allstatus = 0;
|
||||
abort = false;
|
||||
//control the execution: go until there are active nodes or the process must stop
|
||||
while ((activeNodes != 0) && (!abort) && (!shutdown)) {
|
||||
//for each node get the task state
|
||||
int nworkers = tasksProxies.size();
|
||||
int i=0;
|
||||
while (i < nworkers) {
|
||||
WorkerWatcher proxy = tasksProxies.get(i);
|
||||
String state = proxy.getState();
|
||||
AnalysisLogger.getLogger().debug("REMOTE JOB MANAGER-> STATE " + state );
|
||||
//control for aborted computation
|
||||
abort = ((state == null) || state.equals("FAILED") || (!state.equals("DONE") && !state.equals("RUNNING")));
|
||||
//control for finished computation
|
||||
boolean finished = false;
|
||||
if (state != null)
|
||||
finished = state.equals("DONE");
|
||||
//if finished update the active nodes
|
||||
if (finished) {
|
||||
tasksProxies.remove(i);
|
||||
allstatus++;
|
||||
activeNodes--;
|
||||
nworkers--;
|
||||
if (activeNodes == 0)
|
||||
break;
|
||||
}
|
||||
else
|
||||
i++;
|
||||
|
||||
status = Math.min(((float) allstatus / (float) actualNumberOfNodes) * 100f, 95f);
|
||||
if (abort)
|
||||
break;
|
||||
if (shutdown)
|
||||
break;
|
||||
// AnalysisLogger.getLogger().debug(String.format("Task " + i + "executed started at %Tc with %s state ", proxy.getStartTime(), state));
|
||||
//sleep before polling again
|
||||
Thread.sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
activeNodes = 0;
|
||||
|
||||
AnalysisLogger.getLogger().debug("All Tasks have Finished");
|
||||
if (!abort) {
|
||||
AnalysisLogger.getLogger().debug("All Task were successful");
|
||||
/*
|
||||
* List<StorageObject> listElements = client.showDir().RDir(remoteDir); for (StorageObject obj : listElements) { AnalysisLogger.getLogger().debug("obj stored in directory " + remoteDir + ": " + obj.getName()); }
|
||||
*/
|
||||
} else
|
||||
AnalysisLogger.getLogger().debug("Tasks were NOT successful");
|
||||
} else
|
||||
AnalysisLogger.getLogger().debug("Warning: could not execute tasks: No Nodes Available!");
|
||||
AnalysisLogger.getLogger().debug("Whole procedure done in " + (System.currentTimeMillis() - t0) + " ms");
|
||||
status = 100f;
|
||||
return executeAll;
|
||||
}
|
||||
|
||||
public boolean wasAborted() {
|
||||
return abort;
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
shutdown = true;
|
||||
}
|
||||
|
||||
private int findNodes(String scopeString) throws Exception {
|
||||
GCUBEScope scope = GCUBEScope.getScope(scopeString);
|
||||
ISClient client = GHNContext.getImplementation(ISClient.class);
|
||||
WSResourceQuery wsquery = client.getQuery(WSResourceQuery.class);
|
||||
wsquery.addAtomicConditions(new AtomicCondition("//gc:ServiceName", "Executor"));
|
||||
wsquery.addAtomicConditions(new AtomicCondition("/child::*[local-name()='Task']/name[text()='"+pluginName+"']", pluginName));
|
||||
List<RPDocument> listdoc = client.execute(wsquery, scope);
|
||||
EndpointReferenceType epr = null;
|
||||
eprs = new ArrayList<EndpointReferenceType>();
|
||||
int numberOfEP = 0;
|
||||
for (RPDocument resource : listdoc) {
|
||||
epr = resource.getEndpoint();
|
||||
numberOfEP++;
|
||||
eprs.add(epr);
|
||||
}
|
||||
AnalysisLogger.getLogger().debug("Found " + numberOfEP + " endpoints");
|
||||
|
||||
return numberOfEP;
|
||||
}
|
||||
|
||||
private Map<String, Object> generateInput(Object filenames, Object fileurls, String outputDir, String script, String argum, int i, String scope, String serviceClass, String serviceName, String owner, String remoteDir,String session,boolean deletefiles) {
|
||||
Map<String, Object> inputs = new HashMap<String, Object>();
|
||||
inputs.put("FILE_NAMES", filenames);
|
||||
inputs.put("FILE_URLS", fileurls);
|
||||
inputs.put("OUTPUTDIR", ScriptIOWorker.toInputString(outputDir));
|
||||
inputs.put("SCRIPT", ScriptIOWorker.toInputString(script));
|
||||
inputs.put("ARGUMENTS", ScriptIOWorker.toInputString(argum));
|
||||
inputs.put("NODE_IDENTIFIER", "" + i);
|
||||
inputs.put("SCOPE", ScriptIOWorker.toInputString(scope));
|
||||
inputs.put("SERVICE_CLASS", ScriptIOWorker.toInputString(serviceClass));
|
||||
inputs.put("SERVICE_NAME", ScriptIOWorker.toInputString(serviceName));
|
||||
inputs.put("OWNER", ScriptIOWorker.toInputString(owner));
|
||||
inputs.put("REMOTEDIR", ScriptIOWorker.toInputString(remoteDir));
|
||||
inputs.put("CLEAN_CACHE",""+deletefiles);
|
||||
// inputs.put("SESSION", ScriptIO.toInputString(session));
|
||||
return inputs;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
package org.gcube.dataanalysis.executor.job.management;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.gcube.vremanagement.executor.stubs.TaskProxy;
|
||||
|
||||
public class WorkerWatcher {
|
||||
private static int maxTries = 15;
|
||||
private int currentTries;
|
||||
private static String runningState = "RUNNING";
|
||||
private static String failedState = "FAILED";
|
||||
Logger logger;
|
||||
|
||||
TaskProxy proxy;
|
||||
public WorkerWatcher(TaskProxy proxy, Logger logger){
|
||||
this.proxy = proxy;
|
||||
this.logger = logger;
|
||||
currentTries = 1;
|
||||
}
|
||||
|
||||
public String getState(){
|
||||
String state ="";
|
||||
try{
|
||||
proxy.synchronize();
|
||||
state = proxy.getState();
|
||||
return state;
|
||||
}catch(Exception e){
|
||||
logger.error("Error in getting state: recover try number "+currentTries,e);
|
||||
currentTries++;
|
||||
if (currentTries>maxTries){
|
||||
return failedState;
|
||||
}
|
||||
else return runningState;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
package org.gcube.dataanalysis.executor.messagequeue;
|
||||
|
||||
public enum ATTRIBUTE {
|
||||
STATUS,
|
||||
DONE,
|
||||
STARTED,
|
||||
FINISHED,
|
||||
PROCESSING,
|
||||
FATAL_ERROR,
|
||||
TRIVIAL_ERROR,
|
||||
ORDER,
|
||||
NODE,
|
||||
CONTENT,
|
||||
QSESSION,
|
||||
TOPIC_NAME,
|
||||
QUEUE_USER,
|
||||
QUEUE_PASSWORD,
|
||||
QUEUE_URL,
|
||||
TOPIC_RESPONSE_NAME,
|
||||
ERASE,
|
||||
FILE_NAMES,
|
||||
FILE_URLS,
|
||||
CONFIGURATION,
|
||||
OUTPUTDIR,
|
||||
OWNER,
|
||||
REMOTEDIR,
|
||||
SERVICE_CLASS,
|
||||
SERVICE_NAME,
|
||||
SCOPE,
|
||||
SCRIPT,
|
||||
ARGUMENTS,
|
||||
CLEAN_CACHE,
|
||||
ERROR
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
package org.gcube.dataanalysis.executor.messagequeue;
|
||||
|
||||
import javax.jms.ExceptionListener;
|
||||
import javax.jms.JMSException;
|
||||
import javax.jms.MessageConsumer;
|
||||
import javax.jms.MessageListener;
|
||||
|
||||
public class Consumer {
|
||||
public QueueManager manager;
|
||||
public MessageConsumer consumer;
|
||||
private MessageListener consumerCallback;
|
||||
private ExceptionListener errorCallback;
|
||||
private String topic;
|
||||
|
||||
|
||||
public Consumer(QueueManager manager, MessageListener consumerCallback, ExceptionListener errorCallback, String topic) throws JMSException, InterruptedException {
|
||||
this.manager = manager;
|
||||
this.consumerCallback = consumerCallback;
|
||||
this.errorCallback = errorCallback;
|
||||
this.topic = topic;
|
||||
|
||||
create();
|
||||
}
|
||||
|
||||
private void create() throws JMSException, InterruptedException {
|
||||
|
||||
// Topic ConsumerTopic = manager.session.createTopic(topic);
|
||||
|
||||
MessageConsumer consumer = manager.session.createConsumer(manager.destination);
|
||||
// MessageConsumer consumer = manager.session.createDurableSubscriber(ConsumerTopic, "Consumer."+topic);
|
||||
// MessageConsumer consumer = manager.session.createConsumer(ConsumerTopic);
|
||||
manager.connection.setExceptionListener(errorCallback);
|
||||
consumer.setMessageListener(consumerCallback);
|
||||
|
||||
}
|
||||
|
||||
public void standBy() throws JMSException {
|
||||
if (consumer != null)
|
||||
consumer.close();
|
||||
}
|
||||
|
||||
public void wake() throws Exception {
|
||||
this.create();
|
||||
}
|
||||
|
||||
public void stop() throws JMSException {
|
||||
if (consumer != null) {
|
||||
consumer.close();
|
||||
|
||||
}
|
||||
// closeSession();
|
||||
}
|
||||
|
||||
public void closeSession() throws JMSException {
|
||||
try {
|
||||
manager.closeSession();
|
||||
manager.connection.close();
|
||||
} catch (Exception e) {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
package org.gcube.dataanalysis.executor.messagequeue;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
import javax.jms.DeliveryMode;
|
||||
import javax.jms.JMSException;
|
||||
import javax.jms.Message;
|
||||
import javax.jms.MessageProducer;
|
||||
import javax.jms.TextMessage;
|
||||
import javax.jms.Topic;
|
||||
|
||||
public class Producer {
|
||||
|
||||
public MessageProducer producer;
|
||||
public QueueManager manager;
|
||||
public String topic;
|
||||
public String identifier;
|
||||
public Producer(QueueManager manager,String topic) throws JMSException {
|
||||
this.manager = manager;
|
||||
this.topic = topic;
|
||||
this.identifier = "" + UUID.randomUUID();
|
||||
create();
|
||||
}
|
||||
|
||||
private void create() throws JMSException {
|
||||
// Topic ProducerTopic = manager.session.createTopic(topic);
|
||||
producer = manager.session.createProducer(manager.destination);
|
||||
// producer = manager.session.createProducer(ProducerTopic);
|
||||
producer.setDeliveryMode(DeliveryMode.PERSISTENT);
|
||||
}
|
||||
|
||||
public void sendTextMessage(String text, long timeToLive) throws JMSException {
|
||||
TextMessage message = manager.session.createTextMessage(text);
|
||||
producer.setTimeToLive(timeToLive);
|
||||
producer.send(message);
|
||||
}
|
||||
|
||||
public void sendMessage(Object toSend, long timeToLive) throws JMSException {
|
||||
Message message = manager.session.createMessage();
|
||||
message.setObjectProperty(ATTRIBUTE.CONTENT.name(), toSend);
|
||||
producer.setTimeToLive(timeToLive);
|
||||
producer.send(message);
|
||||
}
|
||||
|
||||
public void standBy() throws JMSException {
|
||||
producer.close();
|
||||
}
|
||||
|
||||
public void wake() throws Exception {
|
||||
this.create();
|
||||
}
|
||||
|
||||
public void stop() throws JMSException {
|
||||
if (producer != null){
|
||||
producer.close();
|
||||
|
||||
}
|
||||
// closeSession();
|
||||
}
|
||||
|
||||
|
||||
public void closeSession() throws JMSException {
|
||||
try {
|
||||
manager.closeSession();
|
||||
manager.connection.close();
|
||||
} catch (Exception e) {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
package org.gcube.dataanalysis.executor.messagequeue;
|
||||
|
||||
public class QCONSTANTS {
|
||||
|
||||
public static int refreshStatusTime = 60000;
|
||||
public static int QueueLifeTime = 60000;//3600000;
|
||||
public static long timeToLive = 0;
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
package org.gcube.dataanalysis.executor.messagequeue;
|
||||
|
||||
import java.util.Hashtable;
|
||||
import java.util.Properties;
|
||||
import java.util.UUID;
|
||||
|
||||
import javax.jms.Connection;
|
||||
import javax.jms.ConnectionFactory;
|
||||
import javax.jms.Destination;
|
||||
import javax.jms.JMSException;
|
||||
import javax.jms.Session;
|
||||
import javax.management.ObjectName;
|
||||
import javax.management.remote.JMXConnector;
|
||||
import javax.management.remote.JMXConnectorFactory;
|
||||
import javax.management.remote.JMXServiceURL;
|
||||
import javax.naming.Context;
|
||||
import javax.naming.InitialContext;
|
||||
|
||||
import org.apache.activemq.ActiveMQConnectionFactory;
|
||||
import org.apache.activemq.broker.BrokerService;
|
||||
import org.apache.activemq.broker.jmx.QueueViewMBean;
|
||||
|
||||
public class QueueManager {
|
||||
|
||||
public ActiveMQConnectionFactory connectionFactory;
|
||||
public Connection connection;
|
||||
public Session session;
|
||||
public Destination destination;
|
||||
boolean transacted = false;
|
||||
public String mqurl;
|
||||
private String identifier;
|
||||
|
||||
public void createAndConnect(String user,String password, String mqurl, String queueName) throws JMSException {
|
||||
this.mqurl=mqurl;
|
||||
connect(user,password,mqurl);
|
||||
session = connection.createSession(transacted, Session.CLIENT_ACKNOWLEDGE);
|
||||
this.identifier = ""+UUID.randomUUID();
|
||||
/*
|
||||
Hashtable properties = new Hashtable();
|
||||
properties.put(Context.INITIAL_CONTEXT_FACTORY, "org.apache.activemq.jndi.ActiveMQInitialContextFactory");
|
||||
properties.put(Context.PROVIDER_URL, mqurl);
|
||||
InitialContext context = new InitialContext(properties);
|
||||
ConnectionFactory factory = (ConnectionFactory) context.lookup("ConnectionFactory");
|
||||
|
||||
destination = (Destination) context.lookup(queueName);
|
||||
*/
|
||||
// destination = session.createQueue(queueName+"?consumer.prefetchSize=3");
|
||||
destination = session.createQueue(queueName+"?wireFormat.maxInactivityDurationInitalDelay=3600000&requestTimeout=240000&wireFormat.maxInactivityDuration=3600000");
|
||||
}
|
||||
|
||||
public void destroy(){
|
||||
|
||||
}
|
||||
|
||||
private void connect(String user,String password, String mqurl) throws JMSException{
|
||||
connectionFactory = new ActiveMQConnectionFactory(user, password, mqurl);
|
||||
connectionFactory.getPrefetchPolicy().setQueuePrefetch(1);
|
||||
|
||||
// Properties p = new Properties();
|
||||
// p.put("persistent", "false");
|
||||
// p.put("consumer.prefetchSize", "3");
|
||||
// p.put("ms.prefetchPolicy.all", "3");
|
||||
// p.put("cms.PrefetchPolicy.queuePrefetch", "3");
|
||||
|
||||
// connectionFactory.setProperties(p);
|
||||
connection = connectionFactory.createConnection();
|
||||
connection.setClientID(identifier);
|
||||
connection.start();
|
||||
}
|
||||
|
||||
public void closeSession() throws Exception{
|
||||
// session.unsubscribe(identifier);
|
||||
|
||||
session.close();
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
package org.gcube.dataanalysis.executor.nodes.algorithms;
|
||||
|
||||
|
||||
public class AquamapsNative2050Node extends AquamapsNativeNode{
|
||||
|
||||
public AquamapsNative2050Node(){
|
||||
super();
|
||||
type = "2050";
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "AQUAMAPS_NATIVE_2050";
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return "Algorithm for Native Range in 2050 by Aquamaps on a single node";
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
package org.gcube.dataanalysis.executor.nodes.algorithms;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
|
||||
import org.gcube.contentmanagement.graphtools.utils.MathFunctions;
|
||||
import org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative;
|
||||
|
||||
public class AquamapsNativeNode extends AquamapsSuitableNode{
|
||||
|
||||
public AquamapsNativeNode(){
|
||||
super();
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "AQUAMAPS_NATIVE";
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return "Algorithm for Native Range by Aquamaps on a single node";
|
||||
}
|
||||
|
||||
// writes the distribution model on the DB: input species vector + list of areas vectors to report
|
||||
public void singleStepPostprocess(Object species) {
|
||||
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Analyzing Species distribution");
|
||||
// write info on DB
|
||||
Queue<String> rows = new ConcurrentLinkedQueue<String>();
|
||||
String speciesID = AquamapsSuitableFunctions.getMainInfoID(species);
|
||||
Map<String, Float> csquaresMap = operations.completeDistribution.get(speciesID);
|
||||
|
||||
if (csquaresMap != null) {
|
||||
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Getting csquare probabilites");
|
||||
// write only processed areas
|
||||
for (String singleCsquare : csquaresMap.keySet()) {
|
||||
String additionalInformation = operations.getAdditionalInformation(species, operations.processedAreas.get(singleCsquare));
|
||||
if (additionalInformation == null)
|
||||
additionalInformation = "";
|
||||
else if (additionalInformation.length() > 0)
|
||||
additionalInformation = "," + additionalInformation.trim();
|
||||
|
||||
float prob = 0f;
|
||||
try {
|
||||
prob = csquaresMap.get(singleCsquare);
|
||||
} catch (Exception e) {
|
||||
System.out.println("Aquamaps Algorithm Single Step PostProcess ->Error in getting probability value at " + speciesID + " , " + singleCsquare);
|
||||
}
|
||||
if (prob > 0)
|
||||
rows.offer("'" + speciesID + "','" + singleCsquare + "','" + MathFunctions.roundDecimal(prob, 3) + "'" + additionalInformation);
|
||||
}
|
||||
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Filtering probabilities. Size:"+rows.size());
|
||||
Queue<String> newrows = new AquamapsNative().filterProbabilitySet(rows);
|
||||
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Filtered probabilities. Size:"+newrows.size());
|
||||
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Writing rows on DB");
|
||||
List<String> toWrite = new ArrayList<String>();
|
||||
for (String row:newrows){
|
||||
toWrite.add(row);
|
||||
// System.out.println("Added row: "+row);
|
||||
}
|
||||
AquamapsSuitableFunctions.writeOnDB(toWrite, currentconfig.getParam("DistributionTable"), dbHibConnection);
|
||||
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Rows written on DB");
|
||||
}
|
||||
else
|
||||
System.out.println("Aquamaps Algorithm Single Step PostProcess-> Probability distribution is void");
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
package org.gcube.dataanalysis.executor.nodes.algorithms;
|
||||
|
||||
|
||||
public class AquamapsSuitable2050Node extends AquamapsSuitableNode{
|
||||
|
||||
public AquamapsSuitable2050Node(){
|
||||
super();
|
||||
type = "2050";
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "AQUAMAPS_SUITABLE_2050";
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return "Algorithm for Suitable Range in 2050 by Aquamaps on a single node";
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,200 @@
|
|||
package org.gcube.dataanalysis.executor.nodes.algorithms;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.ObjectInputStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
||||
import org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsAlgorithmCore;
|
||||
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
|
||||
import org.gcube.dataanalysis.ecoengine.utils.Transformations;
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
import com.thoughtworks.xstream.XStream;
|
||||
|
||||
public class AquamapsSuitableFunctions {
|
||||
|
||||
public static String countAllSpeciesQuery = "select count(*) from %1$s;";
|
||||
// public static String countAll = "select count(*) from %1$s;";
|
||||
public static String countAll = "EXPLAIN SELECT * FROM %1$s;";
|
||||
public static String countCsquareCodeQuery = "select count (*) from %1$s d where oceanarea>0";
|
||||
public static String selectAllSpeciesQuery = "select depthmin,meandepth,depthprefmin,pelagic,depthprefmax,depthmax,tempmin,layer,tempprefmin,tempprefmax,tempmax,salinitymin,salinityprefmin,salinityprefmax,salinitymax,primprodmin,primprodprefmin,primprodprefmax,primprodmax,iceconmin,iceconprefmin,iceconprefmax,iceconmax,landdistyn,landdistmin,landdistprefmin,landdistprefmax,landdistmax,nmostlat,smostlat,wmostlong,emostlong,faoareas,speciesid from %1$s order by speciesid limit %2$s offset %3$s;";
|
||||
public static String csquareCodeQuery = "select csquarecode,depthmean,depthmax,depthmin, sstanmean,sbtanmean,salinitymean,salinitybmean, primprodmean,iceconann,landdist,oceanarea,centerlat,centerlong,faoaream,eezall,lme from %1$s d where oceanarea>0 order by csquarecode limit %2$s offset %3$s";
|
||||
public static String createTableStatement = "CREATE TABLE %1$s ( speciesid character varying, csquarecode character varying, probability real, boundboxyn smallint, faoareayn smallint, faoaream integer, eezall character varying, lme integer) WITH (OIDS=FALSE ) #TABLESPACE#; CREATE INDEX CONCURRENTLY %1$s_idx ON %1$s USING btree (speciesid, csquarecode, faoaream, eezall, lme);";
|
||||
public static String metainfo = "boundboxyn, faoareayn, faoaream, eezall, lme";
|
||||
public static String selectAllSpeciesObservationQuery = "SELECT speciesid,maxclat,minclat from %1$s;";
|
||||
public static String probabilityInsertionStatement = "insert into %1$s (speciesid,csquarecode,probability %ADDEDINFORMATION%) VALUES %2$s";
|
||||
public static String deleteDuplicates = "delete from %1$s where speciesid='%2$s'";
|
||||
|
||||
|
||||
// Default Files
|
||||
private static String speciesFile = "species.dat";
|
||||
private static String csquaresFile = "csquares.dat";
|
||||
private static String maxminlatFile = "maxminlat.dat";
|
||||
private static String configFile = "config.dat";
|
||||
|
||||
// file1
|
||||
public HashMap<String, List<Object>> allSpeciesObservations;
|
||||
// file2
|
||||
public List<Object> speciesVectors;
|
||||
// file3
|
||||
public List<Object> environmentVectors;
|
||||
public int numberOfSpecies;
|
||||
public int numberOfCells;
|
||||
|
||||
|
||||
//processing variables
|
||||
public AlgorithmConfiguration currentconfig;
|
||||
public HashMap<String, String> currentSpeciesBoundingBoxInfo;
|
||||
public String currentFAOAreas;
|
||||
public AquamapsAlgorithmCore core;
|
||||
public String type;
|
||||
public HashMap<String, Object> processedAreas;
|
||||
public ConcurrentHashMap<String, Map<String, Float>> completeDistribution;
|
||||
|
||||
public AquamapsSuitableFunctions(AquamapsAlgorithmCore core, String type, AlgorithmConfiguration config) {
|
||||
this.core = core;
|
||||
this.type = type;
|
||||
this.currentconfig = config;
|
||||
}
|
||||
|
||||
//PROBABILITY CALCULATION
|
||||
// calculates probability and takes into account the processes areas by this node
|
||||
public float calcProb(Object species, Object area) {
|
||||
float prob = (float) core.getSpeciesProb((Object[]) species, (Object[]) area);
|
||||
|
||||
String speciesID = getMainInfoID(species);
|
||||
String csquareCode = getGeographicalID(area);
|
||||
if (completeDistribution == null)
|
||||
completeDistribution = new ConcurrentHashMap<String, Map<String, Float>>();
|
||||
|
||||
Map<String, Float> geoDistrib = completeDistribution.get(speciesID);
|
||||
// if the map is null then generate a new map, otherwise update it
|
||||
if (geoDistrib == null) {
|
||||
geoDistrib = new ConcurrentHashMap<String, Float>();
|
||||
completeDistribution.put(speciesID, geoDistrib);
|
||||
}
|
||||
|
||||
if (prob > 0.1) {
|
||||
// record the overall probability distribution
|
||||
geoDistrib.put(csquareCode, prob);
|
||||
if (processedAreas == null)
|
||||
processedAreas = new HashMap<String, Object>();
|
||||
processedAreas.put(csquareCode, area);
|
||||
}
|
||||
|
||||
return prob;
|
||||
}
|
||||
|
||||
|
||||
//BOUNDING BOX CALCULATION
|
||||
// calculates the bounding box information
|
||||
public HashMap<String, Integer> calculateBoundingBox(Object[] csquarecode) {
|
||||
HashMap<String, Integer> boundingInfo = core.calculateBoundingBox("" + csquarecode[0], currentSpeciesBoundingBoxInfo.get("$pass_NS"), currentSpeciesBoundingBoxInfo.get("$pass_N"), currentSpeciesBoundingBoxInfo.get("$pass_S"), AquamapsAlgorithmCore.getElement(csquarecode, 12),// centerlat
|
||||
AquamapsAlgorithmCore.getElement(csquarecode, 13),// centerlong
|
||||
AquamapsAlgorithmCore.getElement(csquarecode, 14),// faoaream
|
||||
currentSpeciesBoundingBoxInfo.get("$paramData_NMostLat"), currentSpeciesBoundingBoxInfo.get("$paramData_SMostLat"), currentSpeciesBoundingBoxInfo.get("$paramData_WMostLong"), currentSpeciesBoundingBoxInfo.get("$paramData_EMostLong"), currentFAOAreas, currentSpeciesBoundingBoxInfo.get("$northern_hemisphere_adjusted"), currentSpeciesBoundingBoxInfo.get("$southern_hemisphere_adjusted"));
|
||||
|
||||
return boundingInfo;
|
||||
}
|
||||
|
||||
// initializes currentFAOAreas and currentSpeciesBoundingBoxInfo
|
||||
public void getBoundingBoxInformation(Object[] speciesInfoRow, Object[] speciesObservations) {
|
||||
Object[] row = speciesInfoRow;
|
||||
String $paramData_NMostLat = AquamapsAlgorithmCore.getElement(row, 28);
|
||||
String $paramData_SMostLat = AquamapsAlgorithmCore.getElement(row, 29);
|
||||
String $paramData_WMostLong = AquamapsAlgorithmCore.getElement(row, 30);
|
||||
String $paramData_EMostLong = AquamapsAlgorithmCore.getElement(row, 31);
|
||||
currentFAOAreas = AquamapsAlgorithmCore.getElement(row, 32);
|
||||
// adjust FAO areas
|
||||
currentFAOAreas = core.procFAO_2050(currentFAOAreas);
|
||||
// get Bounding Box Information
|
||||
// System.out.println("TYPE:"+type);
|
||||
currentSpeciesBoundingBoxInfo = core.getBoundingBoxInfo($paramData_NMostLat, $paramData_SMostLat, $paramData_WMostLong, $paramData_EMostLong, speciesObservations, type);
|
||||
// end of get BoundingBoxInformation
|
||||
}
|
||||
|
||||
|
||||
// DATABASE INTERACTION
|
||||
public static void writeOnDB(List<String> buffer, String destinationTable, SessionFactory dbHibConnection) {
|
||||
|
||||
int endIndex = buffer.size();
|
||||
if (endIndex > 0) {
|
||||
System.out.println("\tWriting Buffer is not empty: "+endIndex);
|
||||
String $probabilityInsertionStatement = AquamapsSuitableFunctions.probabilityInsertionStatement.replace("%ADDEDINFORMATION%", ","+metainfo);
|
||||
|
||||
StringBuffer sb = new StringBuffer();
|
||||
// System.out.println("writeOnDB()->PROBABILITIES BUFFER SIZE DELETION");
|
||||
for (int i = 0; i < endIndex; i++) {
|
||||
sb.append("(" + buffer.get(i) + ")");
|
||||
if (i < endIndex - 1) {
|
||||
sb.append(",");
|
||||
}
|
||||
}
|
||||
|
||||
String insertionString = String.format($probabilityInsertionStatement, destinationTable, sb.toString());
|
||||
|
||||
try {
|
||||
// System.out.println(insertionString);
|
||||
DatabaseFactory.executeSQLUpdate(insertionString, dbHibConnection);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
System.out.println("\tWarning : writing buffer is empty!");
|
||||
System.out.println("\tWriting on DB FINISHED");
|
||||
}
|
||||
|
||||
|
||||
//FILES MANAGEMENT
|
||||
public void dumpAll(String path) throws Exception {
|
||||
Transformations.dumpObjectToFile(path + configFile, currentconfig);
|
||||
// Transformations.dumpObjectToFile(path + csquaresFile, environmentVectors);
|
||||
}
|
||||
|
||||
public void rebuildConfig(String configFile) throws Exception{
|
||||
FileInputStream fis = new FileInputStream(new File(configFile));
|
||||
currentconfig = (AlgorithmConfiguration) new XStream().fromXML(fis);
|
||||
fis.close();
|
||||
}
|
||||
// when uploaded the files will be local
|
||||
public void rebuildAll(int cellOrdinal, int chunksize, int speciesOrdinal, int speciesChunkSize, String pathToFiles) throws Exception {
|
||||
// currentconfig = (AlgorithmConfiguration) Transformations.getObjectFromFile(pathToFiles+configFile);
|
||||
|
||||
/*
|
||||
try{
|
||||
environmentVectors = (List<Object>) Transformations.getObjectFromFile(pathToFiles+csquaresFile);
|
||||
}catch(Exception e){
|
||||
System.out.println("\tError in retrieving environmental vectors");
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
public String getAdditionalInformation(Object species, Object area) {
|
||||
Object[] arearray = (Object[]) area;
|
||||
HashMap<String, Integer> boundingInfo = calculateBoundingBox(arearray);
|
||||
String addedInformation = "'" + boundingInfo.get("$InBox") + "','" + boundingInfo.get("$InFAO") + "','" + arearray[14] + "','" + arearray[15] + "','" + arearray[16] + "'";
|
||||
return addedInformation;
|
||||
}
|
||||
|
||||
|
||||
//AUXILIARY FUNCTIONS
|
||||
public static String getMainInfoID(Object speciesInfo) {
|
||||
String s = "" + ((Object[]) speciesInfo)[33];
|
||||
return s;
|
||||
}
|
||||
|
||||
public static String getGeographicalID(Object geoInfo) {
|
||||
String s = "" + ((Object[]) geoInfo)[0];
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue