ecological-engine/src/main/java/org/gcube/contentmanagement/graphtools/core/StatisticsGenerator.java

170 lines
6.3 KiB
Java

package org.gcube.contentmanagement.graphtools.core;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.gcube.contentmanagement.graphtools.abstracts.SamplesTable;
import org.gcube.contentmanagement.graphtools.core.filters.Filter;
import org.gcube.contentmanagement.graphtools.data.GraphSamplesTable;
import org.gcube.contentmanagement.graphtools.data.conversions.GraphConverter2D;
import org.gcube.contentmanagement.graphtools.data.databases.CommonDBExtractor;
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
import org.gcube.portlets.user.timeseries.charts.support.types.GraphData;
import org.gcube.portlets.user.timeseries.charts.support.types.GraphGroups;
import org.gcube.portlets.user.timeseries.charts.support.types.Point;
import org.hibernate.SessionFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.rapidminer.RapidMiner;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.preprocessing.sampling.AbsoluteSampling;
import com.rapidminer.tools.OperatorService;
import com.thoughtworks.xstream.XStream;
import com.thoughtworks.xstream.io.xml.DomDriver;
public class StatisticsGenerator {
private static Logger logger = LoggerFactory.getLogger(StatisticsGenerator.class);
SessionFactory referenceDBSession;
CommonDBExtractor extractor;
private static final String LogFile = "ALog.properties";
private static final String HibFile = "hibernate.cfg.xml";
private static final String OperatorsFile = "operators.xml";
private List<Filter> ColumnFilters;
private Filter XRangeFilter;
private Filter YRangeFilter;
private XStream xStream;
public void init(String cfgPath) throws Exception {
init(cfgPath, null);
}
public SessionFactory getDBSession() {
return this.referenceDBSession;
}
public void init(String cfgPath, LexicalEngineConfiguration config) throws Exception {
if (config == null)
referenceDBSession = DatabaseFactory.initDBConnection(cfgPath + HibFile);
else
referenceDBSession = DatabaseFactory.initDBConnection(cfgPath + HibFile, config);
ColumnFilters = new ArrayList<Filter>();
extractor = new CommonDBExtractor(referenceDBSession);
logger.info("StatisticsGenerator->initialization complete");
System.setProperty("rapidminer.init.operators", cfgPath + OperatorsFile);
xStream = new XStream(new DomDriver());
RapidMiner.init();
}
public void resetFilters(){
ColumnFilters = new ArrayList<Filter>();
}
public void addColumnFilter(String column, String element, String operator) {
ColumnFilters.add(new Filter(column, element, operator));
}
public void addColumnFilter(String column, String element) {
ColumnFilters.add(new Filter(column, element));
}
public void addXRangeFilter(String xmin, String xmax) {
XRangeFilter = new Filter(xmin, xmax);
}
public void addYRangeFilter(String ymin, String ymax) {
YRangeFilter = new Filter(ymin, ymax);
}
public GraphGroups generateGraphs(int maxElements, String timeSeriesTable, String xDimension, String yDimension, String groupDimension, String speciesColumn, String... filters) throws Exception {
Map<String, SamplesTable> samplesMap = extractor.getMultiDimTemporalTables(ColumnFilters, YRangeFilter, timeSeriesTable, xDimension, groupDimension, yDimension, speciesColumn, filters);
logger.info("StatisticsGenerator-> samplesMap has been generated");
logger.trace(samplesMap.toString());
// setup Absolute Sampling operator
AbsoluteSampling asop = (AbsoluteSampling) OperatorService.createOperator("AbsoluteSampling");
asop.setParameter("sample_size", "" + maxElements);
asop.setParameter("local_random_seed", "-1");
// setup graphgroups
GraphGroups graphgroups = new GraphGroups();
int i = 1;
// for each samples table perform processing
for (String key : samplesMap.keySet()) {
// get samples table
SamplesTable stable = samplesMap.get(key);
// transform samples table into a list of points
List<Point<? extends Number, ? extends Number>> singlegraph = GraphConverter2D.transformTable(stable);
// filter XRange if necessary
if (XRangeFilter != null) {
singlegraph = GraphConverter2D.filterXRange(singlegraph, XRangeFilter.getFirstElement(), XRangeFilter.getSecondElement());
}
// setup the graph samples table to perform mining processing
GraphSamplesTable graphSamples = new GraphSamplesTable(singlegraph);
// if there are too many samples, perform downsampling
if (graphSamples.getNumOfDataRows() > maxElements) {
// generate an Example Set for Rapid Miner
ExampleSet es = graphSamples.generateExampleSet();
// apply Sampling
es = asop.apply(es);
// generate a new graph samples table
graphSamples = new GraphSamplesTable();
graphSamples.generateSampleTable(es);
// get the points list from the graph samples table
singlegraph = graphSamples.getGraph();
logger.trace("Regeneration\n" + graphSamples.toString());
}
// reorder the elements of the points list
// this steps performs re-enumeration and reordering of the rows after the sampling operations
singlegraph = GraphConverter2D.reorder(singlegraph);
logger.trace("Reordering\n" + singlegraph.toString());
if ((singlegraph != null)&&(singlegraph.size()>0)) {
// build up the GraphData for visualization
GraphData grd = new GraphData(singlegraph, true);
// calculate the bounds of the graph
graphSamples.calculateBounds();
// set the bounds
grd.setMaxY(graphSamples.maxY);
grd.setMinY(graphSamples.minY);
// add the points list
graphgroups.addGraph("Distribution for " + key, grd);
logger.trace("StatisticsGenerator-> graphgroup " + i + " generated with key: " + key);
i++;
}
}
logger.info("StatisticsGenerator-> graphgroups have been generated");
return graphgroups;
}
public String generateStringGraphs(int maxElements, String timeSeriesTable, String xDimension, String yDimension, String groupDimension, String speciesColumn, String... filters) throws Exception {
GraphGroups gg = generateGraphs(maxElements, timeSeriesTable, xDimension, yDimension, groupDimension, speciesColumn, filters);
return xStream.toXML(gg);
}
}