Adjustments on data guesser

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@93886 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Gianpaolo Coro 2014-03-31 10:46:45 +00:00
parent 445b3755b0
commit e10ab6c4f8
3 changed files with 120 additions and 122 deletions

View File

@ -1,150 +1,177 @@
package org.gcube.contentmanagement.graphtools.utils; package org.gcube.contentmanagement.graphtools.utils;
import java.text.DateFormat; import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Calendar; import java.util.Calendar;
import java.util.Date; import java.util.Date;
import java.util.Locale; import java.util.Locale;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
public class DateGuesser { public class DateGuesser {
public static void main(String[] args) { public static void main(String[] args) {
Calendar c = convertDate("20/2/2010 0:49:00"); Calendar c = convertDate("20/2/2010 0:49:00");
System.out.println("giorno " + c.get(Calendar.DAY_OF_MONTH) + " mese " + (c.get(Calendar.MONTH) + 1) + " anno " + c.get(Calendar.YEAR)); System.out.println("giorno " + c.get(Calendar.DAY_OF_MONTH) + " mese " + (c.get(Calendar.MONTH) + 1) + " anno " + c.get(Calendar.YEAR));
System.out.println("pattern "+getPattern("20/2/2010 0:49:00") ); System.out.println("pattern " + getPattern("20/2/2010 0:49:00"));
System.out.println("pattern "+getPattern("2009-05-12 15:42:10") ); System.out.println("pattern " + getPattern("2009-05-12 15:42:10"));
System.out.println("pattern "+getPattern("1970") ); System.out.println("pattern " + getPattern("1970"));
System.out.println("pattern "+getPattern("2009-05-12") ); System.out.println("pattern " + getPattern("2009-05-12"));
System.out.println("pattern "+getPattern("prova") ); System.out.println("pattern " + getPattern("prova"));
System.out.println("pattern "+getPattern("9/26/2010 1:49:00")); System.out.println("pattern " + getPattern("9/26/2010 1:49:00"));
System.out.println("pattern "+getPattern("0.99")); System.out.println("pattern " + getPattern("0.99"));
c = convertDate("2009-05-12"); c = convertDate("2009-05-12");
System.out.println("giorno " + c.get(Calendar.DAY_OF_MONTH) + " mese " + (c.get(Calendar.MONTH) + 1) + " anno " + c.get(Calendar.YEAR)); System.out.println("giorno " + c.get(Calendar.DAY_OF_MONTH) + " mese " + (c.get(Calendar.MONTH) + 1) + " anno " + c.get(Calendar.YEAR));
// Properties prop = new Properties(System.getProperties()); // Properties prop = new Properties(System.getProperties());
// prop.list(System.out); // prop.list(System.out);
} }
public static Calendar convertDate(String data) {
return convertDate(data,null);
}
public static String getPattern(String data) {
return getPattern(data,null);
}
// private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss","MM/yy","MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy","MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm","yyyy-MM-dd","yyyy-MM-dd HH:mm:ss", "h:mm a", "yyyy"};
private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "MM/yy","MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "MM/dd/yyyy HH:mm:ss","dd/MM/yyyy HH:mm:ss","EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy","MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm","yyyy-MM-dd","yyyy-MM-dd HH:mm:ss","MM/dd/yy KK:mm a","MM/dd/yy KK:mm:ss a","h:mm a", "yyyy","s"};
private static final String[] formatiITA = { "dd\\MM\\yyyy", "dd\\MM\\yy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yy", "dd/MM/yyyy","dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss", "MM/yy","MM/yyyy","dd.MM.yyyy G 'alle' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh a, zzzz", "K:mm a, z", "dd-MMM-yy", "dd.MM.yyyy.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "dd-MMM-yyyy HH:mm", "h:mm a", "yyyy"};
public static Calendar convertDate(String data) {
return convertDate(data, null);
}
public static String getPattern(String data) {
return getPattern(data, null);
}
// private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss","MM/yy","MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy","MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm","yyyy-MM-dd","yyyy-MM-dd HH:mm:ss", "h:mm a", "yyyy"};
private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "MM/yy", "MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "MM/dd/yyyy HH:mm:ss", "dd/MM/yyyy HH:mm:ss", "EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy", "MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm", "yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "MM/dd/yy KK:mm a", "MM/dd/yy KK:mm:ss a", "h:mm a", "yyyy", "s" };
private static final String[] formatiITA = { "dd\\MM\\yyyy", "dd\\MM\\yy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss", "MM/yy", "MM/yyyy", "dd.MM.yyyy G 'alle' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh a, zzzz", "K:mm a, z", "dd-MMM-yy", "dd.MM.yyyy.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "dd-MMM-yyyy HH:mm", "h:mm a", "yyyy" };
public static String getPattern(String data, String language) { public static String getPattern(String data, String language) {
if (isSeconds(data))
//String language = System.getProperty("user.language"); return "s";
// String language = System.getProperty("user.language");
Locale l = Locale.UK; Locale l = Locale.UK;
if (language!=null && language.equalsIgnoreCase("it")) if (language != null && language.equalsIgnoreCase("it"))
l = Locale.ITALY; l = Locale.ITALY;
Date dat = null; Date dat = null;
DateFormat formatter = null; DateFormat formatter = null;
String[] formati = formats; String[] formati = formats;
if (l.equals(Locale.ITALY)) if (l.equals(Locale.ITALY))
formati = formatiITA; formati = formatiITA;
int index = -1; int index = -1;
for (int i = 0; i < formati.length; i++) { for (int i = 0; i < formati.length; i++) {
try { try {
formatter = new SimpleDateFormat(formati[i],l); formatter = new SimpleDateFormat(formati[i], l);
dat = (Date) formatter.parse(data); dat = (Date) formatter.parse(data);
if (index == -1) if (index == -1)
index = i; index = i;
else if (formati[index].length()<=formati[i].length()) else if (formati[index].length() <= formati[i].length())
index = i; index = i;
// System.out.println(formati[i]); // System.out.println(formati[i]);
} catch (Exception e) { } catch (Exception e) {
// e.printStackTrace(); // e.printStackTrace();
} }
} }
if (dat != null) {
if (dat!=null){
return formati[index]; return formati[index];
} } else
else
return null; return null;
} }
public static boolean isSeconds(String timeString) {
double seconds = -1;
boolean isSeconds = false;
try {
seconds = Double.parseDouble(timeString);
int secondsint = (int) seconds;
if ((("" + secondsint).length() == 4) && (secondsint == seconds))
isSeconds = false;
else {
AnalysisLogger.getLogger().debug("This entry contains seconds indication");
isSeconds = true;
}
} catch (Exception e1) {
}
return isSeconds;
}
public static Calendar convertDate(String data, String language) { public static Calendar convertDate(String data, String language) {
Date bestDate = null;
//String language = System.getProperty("user.language");
Locale l = Locale.UK;
if (language!=null && language.equalsIgnoreCase("it"))
l = Locale.ITALY;
Date dat = null; Date dat = null;
DateFormat formatter = null; DateFormat formatter = null;
Locale l = Locale.UK;
if (language != null && language.equalsIgnoreCase("it"))
l = Locale.ITALY;
String[] formati = formats; if (isSeconds(data)) {
formatter = new SimpleDateFormat("s", l);
if (l.equals(Locale.ITALY))
formati = formatiITA;
int index = -1;
Date bestDate = null;
for (int i = 0; i < formati.length; i++) {
try { try {
formatter = new SimpleDateFormat(formati[i],l);
dat = (Date) formatter.parse(data); dat = (Date) formatter.parse(data);
if (index==-1){ bestDate = dat;
bestDate = dat; } catch (ParseException e) {
index = i; }
} else {
// String language = System.getProperty("user.language");
String[] formati = formats;
if (l.equals(Locale.ITALY))
formati = formatiITA;
int index = -1;
for (int i = 0; i < formati.length; i++) {
try {
formatter = new SimpleDateFormat(formati[i], l);
dat = (Date) formatter.parse(data);
if (index == -1) {
bestDate = dat;
index = i;
} else if (formati[index].length() <= formati[i].length()) {
bestDate = dat;
index = i;
}
// break;
} catch (Exception e) {
// e.printStackTrace();
} }
else if (formati[index].length()<=formati[i].length()){
bestDate = dat;
index = i;
}
// break;
} catch (Exception e) {
// e.printStackTrace();
} }
} }
if (bestDate != null) {
if (bestDate!=null){
Calendar c = Calendar.getInstance(); Calendar c = Calendar.getInstance();
c.setTime(bestDate); c.setTime(bestDate);
// System.out.println("data "+data+" giorno " + c.get(Calendar.DAY_OF_MONTH) + " mese " + (c.get(Calendar.MONTH) + 1) + " anno " + c.get(Calendar.YEAR)); // System.out.println("data "+data+" giorno " + c.get(Calendar.DAY_OF_MONTH) + " mese " + (c.get(Calendar.MONTH) + 1) + " anno " + c.get(Calendar.YEAR));
return c; return c;
} } else
else
return null; return null;
} }
public static final String YEAR = "YEAR"; public static final String YEAR = "YEAR";
public static final String MONTH = "MONTH"; public static final String MONTH = "MONTH";
public static final String DAY = "DAY"; public static final String DAY = "DAY";
public static String granularity(String pattern) { public static String granularity(String pattern) {
SimpleDateFormat formatter = null; SimpleDateFormat formatter = null;
try{ try {
formatter = new SimpleDateFormat("MM/dd/yyyy"); formatter = new SimpleDateFormat("MM/dd/yyyy");
formatter.parse(pattern); formatter.parse(pattern);
return DAY; return DAY;
}catch(Exception e){} } catch (Exception e) {
try{ }
try {
formatter = new SimpleDateFormat("MM/yyyy"); formatter = new SimpleDateFormat("MM/yyyy");
formatter.parse(pattern); formatter.parse(pattern);
return MONTH; return MONTH;
}catch(Exception e){} } catch (Exception e) {
try{ }
try {
formatter = new SimpleDateFormat("MM/yy"); formatter = new SimpleDateFormat("MM/yy");
formatter.parse(pattern); formatter.parse(pattern);
return MONTH; return MONTH;
}catch(Exception e){} } catch (Exception e) {
}
return YEAR; return YEAR;
} }

View File

@ -2,9 +2,7 @@ package org.gcube.dataanalysis.ecoengine.signals;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Arrays; import java.util.Arrays;
import java.util.Calendar;
import java.util.Date; import java.util.Date;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
@ -127,12 +125,11 @@ public class TimeSeries {
} }
// each element in the list is Time,Quantity // each element in the list is Time,Quantity
public static TimeSeries buildFromSignal(List<Tuple<String>> lines, AlgorithmConfiguration config) { public static TimeSeries buildFromSignal(List<Tuple<String>> lines, AlgorithmConfiguration config) throws Exception {
TimeSeries ts = new TimeSeries(lines.size(), config); TimeSeries ts = new TimeSeries(lines.size(), config);
int counter = 0; int counter = 0;
HashMap<String, String> timescache = new HashMap<String, String>();
String timepattern = null; String timepattern = null;
boolean isSeconds = false; SimpleDateFormat sdf = null;
for (Tuple<String> line : lines) { for (Tuple<String> line : lines) {
String timel = line.getElements().get(0); String timel = line.getElements().get(0);
@ -140,48 +137,24 @@ public class TimeSeries {
Double quantity = Double.parseDouble(line.getElements().get(1)); Double quantity = Double.parseDouble(line.getElements().get(1));
Date time = null; Date time = null;
/*
* try { SimpleDateFormat sdf = new SimpleDateFormat("E MMM dd HH:mm:ss zzz yyyy", Locale.ENGLISH); time = (Date) sdf.parse(timel); } catch (Exception e) { } try { SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy HH:mm", Locale.ROOT); time = (Date) sdf.parse(timel); } catch (Exception e) { }
*
* if (time == null)
*/
if (counter == 0) {
timepattern = DateGuesser.getPattern(timel);
// time = DateGuesser.convertDate(timel).getTime(); AnalysisLogger.getLogger().debug("Time pattern: " + timepattern);
sdf = new SimpleDateFormat(timepattern, Locale.ENGLISH);
try { }
if (counter == 0) { try{
timepattern = DateGuesser.getPattern(timel); time = (Date) sdf.parse(timel);
AnalysisLogger.getLogger().debug("Time pattern: " + timepattern); }catch(Exception e){
//distinguish between years and seconds AnalysisLogger.getLogger().debug("Error in parsing...adjusting "+timel);
double seconds =-1;
try{
seconds = Double.parseDouble(timel);
int secondsint = (int) seconds;
if (((""+secondsint).length()==4) && (secondsint==seconds))
isSeconds = false;
else{
AnalysisLogger.getLogger().debug("Getting seconds instead of pattern!: " + timel);
isSeconds = true;
timepattern="s";
}
}catch(Exception e1){}
}
SimpleDateFormat sdf = new SimpleDateFormat(timepattern, Locale.ENGLISH);
time = (Date) sdf.parse(timel);
} catch (Exception e) {
time = DateGuesser.convertDate(timel).getTime(); time = DateGuesser.convertDate(timel).getTime();
AnalysisLogger.getLogger().debug("ERROR in parsing time :"+timel+" adjusting..."); AnalysisLogger.getLogger().debug("Error in parsing...adjusting "+timel+" in "+time);
// e.printStackTrace();
} }
if (counter == 0) { if (counter == 0) {
AnalysisLogger.getLogger().debug("Date detection: input " + timel + " output " + time); AnalysisLogger.getLogger().debug("Date detection: input " + timel + " output " + time);
} }
ts.addElement(quantity, time, timel, counter); ts.addElement(quantity, time, timel, counter);
counter++; counter++;

View File

@ -7,8 +7,6 @@ import org.gcube.contentmanagement.graphtools.utils.MathFunctions;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType; import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates; import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.interfaces.StandardLocalExternalAlgorithm; import org.gcube.dataanalysis.ecoengine.interfaces.StandardLocalExternalAlgorithm;
import org.gcube.dataanalysis.ecoengine.signals.PeriodicityDetector; import org.gcube.dataanalysis.ecoengine.signals.PeriodicityDetector;
@ -133,7 +131,7 @@ public class TimeSeriesAnalysis extends StandardLocalExternalAlgorithm {
inputs.add(p); inputs.add(p);
ColumnType p1 = new ColumnType(timeSeriesTable, valuesColumn, "The column containing the values of the time series", "values", false); ColumnType p1 = new ColumnType(timeSeriesTable, valuesColumn, "The column containing the values of the time series", "values", false);
inputs.add(p1); inputs.add(p1);
addDoubleInput(frequencyResolution, "The precision in detecting the period. The lower this number the lower the number of samples used at each step in the Spectrogram. Reducing this, the spectrogram will be finer and sharper, but you should tune it. Too few samples will make the Spectrogram noisy.", "1"); addDoubleInput(frequencyResolution, "The precision in detecting the period. The lower this number the less the number of points in the Spectrogram (higher number of samples used at each step). Reducing this, the spectrogram will be finer and sharper, but you should tune it. Too many samples will make the Spectrogram noisy.", "1");
addEnumerateInput(AggregationFunctions.values(), aggregationFunction, "Function to apply to samples with the same time instant", AggregationFunctions.SUM.name()); addEnumerateInput(AggregationFunctions.values(), aggregationFunction, "Function to apply to samples with the same time instant", AggregationFunctions.SUM.name());
} }