264 lines
7.3 KiB
Java
264 lines
7.3 KiB
Java
package org.gcube.dataanalysis.ecoengine.utils;
|
|
|
|
import java.io.BufferedWriter;
|
|
import java.io.File;
|
|
import java.io.FileInputStream;
|
|
import java.io.FileWriter;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
import org.gcube.contentmanagement.graphtools.data.BigSamplesTable;
|
|
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
|
|
|
import com.rapidminer.example.ExampleSet;
|
|
import com.rapidminer.example.table.DataRow;
|
|
import com.rapidminer.example.table.ExampleTable;
|
|
//import com.sun.org.apache.regexp.internal.RE;
|
|
import com.thoughtworks.xstream.XStream;
|
|
|
|
public class Transformations {
|
|
|
|
public static ExampleSet matrix2ExampleSet(double[][] sampleVectors) {
|
|
|
|
int m = sampleVectors.length;
|
|
|
|
BigSamplesTable samples = new BigSamplesTable();
|
|
|
|
for (int k = 0; k < m; k++)
|
|
samples.addSampleRow("sample", sampleVectors[k]);
|
|
|
|
return samples.generateExampleSet();
|
|
|
|
}
|
|
|
|
public static double[][] exampleSet2Matrix(ExampleSet set) {
|
|
|
|
int m = set.size();
|
|
ExampleTable table = set.getExampleTable();
|
|
int n = table.getAttributeCount();
|
|
double[][] matrix = new double[m][n - 1];
|
|
for (int i = 0; i < m; i++) {
|
|
DataRow row = table.getDataRow(i);
|
|
for (int j = 0; j < n - 1; j++) {
|
|
if (!table.getAttribute(j).isNominal()) {
|
|
double d = row.get(table.getAttribute(j));
|
|
matrix[i][j] = d;
|
|
}
|
|
}
|
|
}
|
|
|
|
return matrix;
|
|
|
|
}
|
|
|
|
// gets all the columns from a matrix
|
|
public static double[][] traspose(double[][] matrix) {
|
|
int m = matrix.length;
|
|
if (m > 0) {
|
|
int n = matrix[0].length;
|
|
|
|
double columns[][] = new double[n][m];
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
for (int j = 0; j < m; j++)
|
|
columns[i][j] = matrix[j][i];
|
|
}
|
|
|
|
return columns;
|
|
} else
|
|
return null;
|
|
}
|
|
|
|
// gets a column from a matrix
|
|
public static double[] getColumn(int index, double[][] matrix) {
|
|
int colulen = matrix.length;
|
|
double column[] = new double[colulen];
|
|
for (int i = 0; i < colulen; i++) {
|
|
column[i] = matrix[i][index];
|
|
}
|
|
return column;
|
|
}
|
|
|
|
// gets a column from a matrix
|
|
public static void substColumn(double[] column, int index, double[][] matrix) {
|
|
|
|
for (int i = 0; i < matrix.length; i++) {
|
|
matrix[i][index] = column[i];
|
|
}
|
|
|
|
}
|
|
|
|
// merge matrixes: puts the rows of a matrix under another matrix
|
|
public static double[][] mergeMatrixes(double[][] matrix1, double[][] matrix2) {
|
|
|
|
if ((matrix1 == null) || (matrix1.length == 0))
|
|
return matrix2;
|
|
else if ((matrix2 == null) || (matrix2.length == 0))
|
|
return matrix1;
|
|
else {
|
|
int len1 = matrix1.length;
|
|
int len2 = matrix2.length;
|
|
int superlen = len1 + len2;
|
|
double[][] supermatrix = new double[superlen][];
|
|
for (int i = 0; i < len1; i++) {
|
|
supermatrix[i] = matrix1[i];
|
|
}
|
|
for (int i = len1; i < superlen; i++) {
|
|
supermatrix[i] = matrix2[i - len1];
|
|
}
|
|
return supermatrix;
|
|
}
|
|
}
|
|
|
|
public static String vector2String(double[] vector) {
|
|
String out = "";
|
|
for (int i = 0; i < vector.length; i++) {
|
|
if (i > 0)
|
|
out = out + "," + vector[i];
|
|
else
|
|
out = "" + vector[i];
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
public static Object getObjectFromFile(String file) throws Exception {
|
|
|
|
/*
|
|
* FileInputStream fis = new FileInputStream(file); ObjectInputStream ois = new ObjectInputStream(fis); return ois.readObject();
|
|
*/
|
|
XStream xstream = new XStream();
|
|
return xstream.fromXML(new FileInputStream(file));
|
|
// return xstream.fromXML(FileTools.loadString(file, "UTF-8"));
|
|
}
|
|
|
|
public static void dumpObjectToFile(String file, Object toWrite) throws Exception {
|
|
|
|
XStream xstream = new XStream();
|
|
BufferedWriter bw = new BufferedWriter(new FileWriter(file));
|
|
bw.write(xstream.toXML(toWrite));
|
|
bw.close();
|
|
/*
|
|
* FileOutputStream fos = new FileOutputStream(file); ObjectOutputStream ois = new ObjectOutputStream(fos); ois.writeObject(toWrite);
|
|
*/
|
|
}
|
|
|
|
public static void dumpConfig(String pathToFile, AlgorithmConfiguration config) throws Exception {
|
|
Transformations.dumpObjectToFile(pathToFile, config);
|
|
}
|
|
|
|
public static AlgorithmConfiguration restoreConfig(String configFile) throws Exception {
|
|
FileInputStream fis = new FileInputStream(new File(configFile));
|
|
AlgorithmConfiguration config = (AlgorithmConfiguration) new XStream().fromXML(fis);
|
|
fis.close();
|
|
return config;
|
|
}
|
|
|
|
public static double indexString(String string) {
|
|
// string = Sha1.SHA1(string);
|
|
StringBuffer sb = new StringBuffer();
|
|
if ((string == null) || (string.length() == 0))
|
|
return -1;
|
|
|
|
int m = string.length();
|
|
for (int i = 0; i < m; i++) {
|
|
sb.append((int) string.charAt(i));
|
|
}
|
|
|
|
double d = Double.MAX_VALUE;
|
|
try {
|
|
d = Double.valueOf(sb.toString());
|
|
} catch (Throwable e) {
|
|
}
|
|
|
|
if (d > Integer.MAX_VALUE)
|
|
return (indexString(string.substring(0, 3)));
|
|
|
|
return d;
|
|
}
|
|
|
|
public static void main(String[] args) throws Exception {
|
|
String in = "h,t,,,,k,";
|
|
in = "Salmo lucidus (Richardson 1836)\",\"Salmo lucidus\",\"Richardson 1836\",\"SIMPLE\",0.6547619047619048,\"ASFIS\",\"APL\",\"Plantae aquaticae\",\"\",\"\",\"PLANTAE AQUATICAE MISCELLANEA\",\"Aquatic plants nei\",\"Plantes aquatiques nca\",\"Plantas acuáticas nep\"";
|
|
|
|
List<String> ll = Transformations.parseCVSString(in, ",");
|
|
|
|
for (String l : ll) {
|
|
System.out.println(l);
|
|
}
|
|
|
|
String s = "un'estate al mare";
|
|
System.out.println("index: " + indexString(s));
|
|
System.out.println("sha1: " + Sha1.SHA1(s));
|
|
String s1 = "un'estate al mari";
|
|
System.out.println("index: " + indexString(s1));
|
|
System.out.println("sha1: " + Sha1.SHA1(s1));
|
|
String s2 = "ciao amico mio come stai oggi? fa caldo o sono io che mi scotto?";
|
|
System.out.println("index: " + indexString(s2));
|
|
System.out.println("sha1: " + Sha1.SHA1(s2));
|
|
}
|
|
|
|
public static List<String> parseCVSString(String row, String delimiter) throws Exception {
|
|
|
|
List<String> elements = new ArrayList<String>();
|
|
String phrase = row;
|
|
int idxdelim = -1;
|
|
boolean quot = false;
|
|
phrase = phrase.trim();
|
|
while ((idxdelim = phrase.indexOf(delimiter)) >= 0) {
|
|
quot = phrase.startsWith("\"");
|
|
if (quot) {
|
|
phrase = phrase.substring(1);
|
|
String quoted = "";
|
|
if (phrase.startsWith("\""))
|
|
phrase = phrase.substring(1);
|
|
else{
|
|
|
|
|
|
//Bug RE
|
|
//
|
|
//RE regexp = new RE("[^\\\\]\"");
|
|
//boolean matching = regexp.match(phrase);
|
|
|
|
//if (matching) {
|
|
// int i0 = regexp.getParenStart(0);
|
|
// quoted = phrase.substring(0, i0 + 1).trim();
|
|
// phrase = phrase.substring(i0 + 2).trim();
|
|
//}
|
|
|
|
//Fix
|
|
Pattern p = Pattern.compile("[^\\\\]\"");
|
|
Matcher m = p.matcher(phrase);
|
|
boolean matching = m.matches();
|
|
|
|
if (matching) {
|
|
int i0 = m.start();
|
|
quoted = phrase.substring(0, i0 + 1).trim();
|
|
phrase = phrase.substring(i0 + 2).trim();
|
|
}
|
|
}
|
|
|
|
if (phrase.startsWith(delimiter))
|
|
phrase = phrase.substring(1);
|
|
|
|
elements.add(quoted);
|
|
|
|
} else {
|
|
elements.add(phrase.substring(0, idxdelim));
|
|
phrase = phrase.substring(idxdelim + 1).trim();
|
|
}
|
|
}
|
|
if (phrase.startsWith("\""))
|
|
phrase = phrase.substring(1);
|
|
|
|
if (phrase.endsWith("\""))
|
|
phrase = phrase.substring(0, phrase.length() - 1);
|
|
|
|
elements.add(phrase);
|
|
|
|
return elements;
|
|
}
|
|
}
|