Adjustments on data guesser

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@93886 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Gianpaolo Coro 2014-03-31 10:46:45 +00:00
parent 445b3755b0
commit e10ab6c4f8
3 changed files with 120 additions and 122 deletions

View File

@ -1,150 +1,177 @@
package org.gcube.contentmanagement.graphtools.utils;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Locale;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
public class DateGuesser {
public static void main(String[] args) {
Calendar c = convertDate("20/2/2010 0:49:00");
System.out.println("giorno " + c.get(Calendar.DAY_OF_MONTH) + " mese " + (c.get(Calendar.MONTH) + 1) + " anno " + c.get(Calendar.YEAR));
System.out.println("pattern "+getPattern("20/2/2010 0:49:00") );
System.out.println("pattern "+getPattern("2009-05-12 15:42:10") );
System.out.println("pattern "+getPattern("1970") );
System.out.println("pattern "+getPattern("2009-05-12") );
System.out.println("pattern "+getPattern("prova") );
System.out.println("pattern "+getPattern("9/26/2010 1:49:00"));
System.out.println("pattern "+getPattern("0.99"));
c = convertDate("2009-05-12");
System.out.println("pattern " + getPattern("20/2/2010 0:49:00"));
System.out.println("pattern " + getPattern("2009-05-12 15:42:10"));
System.out.println("pattern " + getPattern("1970"));
System.out.println("pattern " + getPattern("2009-05-12"));
System.out.println("pattern " + getPattern("prova"));
System.out.println("pattern " + getPattern("9/26/2010 1:49:00"));
System.out.println("pattern " + getPattern("0.99"));
c = convertDate("2009-05-12");
System.out.println("giorno " + c.get(Calendar.DAY_OF_MONTH) + " mese " + (c.get(Calendar.MONTH) + 1) + " anno " + c.get(Calendar.YEAR));
// Properties prop = new Properties(System.getProperties());
// prop.list(System.out);
}
public static Calendar convertDate(String data) {
return convertDate(data,null);
}
public static String getPattern(String data) {
return getPattern(data,null);
}
// private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss","MM/yy","MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy","MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm","yyyy-MM-dd","yyyy-MM-dd HH:mm:ss", "h:mm a", "yyyy"};
private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "MM/yy","MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "MM/dd/yyyy HH:mm:ss","dd/MM/yyyy HH:mm:ss","EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy","MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm","yyyy-MM-dd","yyyy-MM-dd HH:mm:ss","MM/dd/yy KK:mm a","MM/dd/yy KK:mm:ss a","h:mm a", "yyyy","s"};
private static final String[] formatiITA = { "dd\\MM\\yyyy", "dd\\MM\\yy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yy", "dd/MM/yyyy","dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss", "MM/yy","MM/yyyy","dd.MM.yyyy G 'alle' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh a, zzzz", "K:mm a, z", "dd-MMM-yy", "dd.MM.yyyy.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "dd-MMM-yyyy HH:mm", "h:mm a", "yyyy"};
public static Calendar convertDate(String data) {
return convertDate(data, null);
}
public static String getPattern(String data) {
return getPattern(data, null);
}
// private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss","MM/yy","MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy","MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm","yyyy-MM-dd","yyyy-MM-dd HH:mm:ss", "h:mm a", "yyyy"};
private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "MM/yy", "MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "MM/dd/yyyy HH:mm:ss", "dd/MM/yyyy HH:mm:ss", "EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy", "MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm", "yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "MM/dd/yy KK:mm a", "MM/dd/yy KK:mm:ss a", "h:mm a", "yyyy", "s" };
private static final String[] formatiITA = { "dd\\MM\\yyyy", "dd\\MM\\yy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss", "MM/yy", "MM/yyyy", "dd.MM.yyyy G 'alle' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh a, zzzz", "K:mm a, z", "dd-MMM-yy", "dd.MM.yyyy.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "dd-MMM-yyyy HH:mm", "h:mm a", "yyyy" };
public static String getPattern(String data, String language) {
//String language = System.getProperty("user.language");
if (isSeconds(data))
return "s";
// String language = System.getProperty("user.language");
Locale l = Locale.UK;
if (language!=null && language.equalsIgnoreCase("it"))
if (language != null && language.equalsIgnoreCase("it"))
l = Locale.ITALY;
Date dat = null;
DateFormat formatter = null;
String[] formati = formats;
if (l.equals(Locale.ITALY))
formati = formatiITA;
formati = formatiITA;
int index = -1;
for (int i = 0; i < formati.length; i++) {
try {
formatter = new SimpleDateFormat(formati[i],l);
formatter = new SimpleDateFormat(formati[i], l);
dat = (Date) formatter.parse(data);
if (index == -1)
index = i;
else if (formati[index].length()<=formati[i].length())
else if (formati[index].length() <= formati[i].length())
index = i;
// System.out.println(formati[i]);
// System.out.println(formati[i]);
} catch (Exception e) {
// e.printStackTrace();
// e.printStackTrace();
}
}
if (dat!=null){
if (dat != null) {
return formati[index];
}
else
} else
return null;
}
public static boolean isSeconds(String timeString) {
double seconds = -1;
boolean isSeconds = false;
try {
seconds = Double.parseDouble(timeString);
int secondsint = (int) seconds;
if ((("" + secondsint).length() == 4) && (secondsint == seconds))
isSeconds = false;
else {
AnalysisLogger.getLogger().debug("This entry contains seconds indication");
isSeconds = true;
}
} catch (Exception e1) {
}
return isSeconds;
}
public static Calendar convertDate(String data, String language) {
//String language = System.getProperty("user.language");
Locale l = Locale.UK;
if (language!=null && language.equalsIgnoreCase("it"))
l = Locale.ITALY;
Date bestDate = null;
Date dat = null;
DateFormat formatter = null;
Locale l = Locale.UK;
if (language != null && language.equalsIgnoreCase("it"))
l = Locale.ITALY;
String[] formati = formats;
if (l.equals(Locale.ITALY))
formati = formatiITA;
int index = -1;
Date bestDate = null;
for (int i = 0; i < formati.length; i++) {
if (isSeconds(data)) {
formatter = new SimpleDateFormat("s", l);
try {
formatter = new SimpleDateFormat(formati[i],l);
dat = (Date) formatter.parse(data);
if (index==-1){
bestDate = dat;
index = i;
bestDate = dat;
} catch (ParseException e) {
}
} else {
// String language = System.getProperty("user.language");
String[] formati = formats;
if (l.equals(Locale.ITALY))
formati = formatiITA;
int index = -1;
for (int i = 0; i < formati.length; i++) {
try {
formatter = new SimpleDateFormat(formati[i], l);
dat = (Date) formatter.parse(data);
if (index == -1) {
bestDate = dat;
index = i;
} else if (formati[index].length() <= formati[i].length()) {
bestDate = dat;
index = i;
}
// break;
} catch (Exception e) {
// e.printStackTrace();
}
else if (formati[index].length()<=formati[i].length()){
bestDate = dat;
index = i;
}
// break;
} catch (Exception e) {
// e.printStackTrace();
}
}
if (bestDate!=null){
if (bestDate != null) {
Calendar c = Calendar.getInstance();
c.setTime(bestDate);
// System.out.println("data "+data+" giorno " + c.get(Calendar.DAY_OF_MONTH) + " mese " + (c.get(Calendar.MONTH) + 1) + " anno " + c.get(Calendar.YEAR));
// System.out.println("data "+data+" giorno " + c.get(Calendar.DAY_OF_MONTH) + " mese " + (c.get(Calendar.MONTH) + 1) + " anno " + c.get(Calendar.YEAR));
return c;
}
else
} else
return null;
}
public static final String YEAR = "YEAR";
public static final String MONTH = "MONTH";
public static final String DAY = "DAY";
public static String granularity(String pattern) {
SimpleDateFormat formatter = null;
try{
try {
formatter = new SimpleDateFormat("MM/dd/yyyy");
formatter.parse(pattern);
return DAY;
}catch(Exception e){}
try{
} catch (Exception e) {
}
try {
formatter = new SimpleDateFormat("MM/yyyy");
formatter.parse(pattern);
return MONTH;
}catch(Exception e){}
try{
} catch (Exception e) {
}
try {
formatter = new SimpleDateFormat("MM/yy");
formatter.parse(pattern);
return MONTH;
}catch(Exception e){}
} catch (Exception e) {
}
return YEAR;
}

View File

@ -2,9 +2,7 @@ package org.gcube.dataanalysis.ecoengine.signals;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
@ -127,12 +125,11 @@ public class TimeSeries {
}
// each element in the list is Time,Quantity
public static TimeSeries buildFromSignal(List<Tuple<String>> lines, AlgorithmConfiguration config) {
public static TimeSeries buildFromSignal(List<Tuple<String>> lines, AlgorithmConfiguration config) throws Exception {
TimeSeries ts = new TimeSeries(lines.size(), config);
int counter = 0;
HashMap<String, String> timescache = new HashMap<String, String>();
String timepattern = null;
boolean isSeconds = false;
SimpleDateFormat sdf = null;
for (Tuple<String> line : lines) {
String timel = line.getElements().get(0);
@ -140,48 +137,24 @@ public class TimeSeries {
Double quantity = Double.parseDouble(line.getElements().get(1));
Date time = null;
/*
* try { SimpleDateFormat sdf = new SimpleDateFormat("E MMM dd HH:mm:ss zzz yyyy", Locale.ENGLISH); time = (Date) sdf.parse(timel); } catch (Exception e) { } try { SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy HH:mm", Locale.ROOT); time = (Date) sdf.parse(timel); } catch (Exception e) { }
*
* if (time == null)
*/
// time = DateGuesser.convertDate(timel).getTime();
try {
if (counter == 0) {
timepattern = DateGuesser.getPattern(timel);
AnalysisLogger.getLogger().debug("Time pattern: " + timepattern);
//distinguish between years and seconds
double seconds =-1;
try{
seconds = Double.parseDouble(timel);
int secondsint = (int) seconds;
if (((""+secondsint).length()==4) && (secondsint==seconds))
isSeconds = false;
else{
AnalysisLogger.getLogger().debug("Getting seconds instead of pattern!: " + timel);
isSeconds = true;
timepattern="s";
}
}catch(Exception e1){}
}
SimpleDateFormat sdf = new SimpleDateFormat(timepattern, Locale.ENGLISH);
time = (Date) sdf.parse(timel);
} catch (Exception e) {
if (counter == 0) {
timepattern = DateGuesser.getPattern(timel);
AnalysisLogger.getLogger().debug("Time pattern: " + timepattern);
sdf = new SimpleDateFormat(timepattern, Locale.ENGLISH);
}
try{
time = (Date) sdf.parse(timel);
}catch(Exception e){
AnalysisLogger.getLogger().debug("Error in parsing...adjusting "+timel);
time = DateGuesser.convertDate(timel).getTime();
AnalysisLogger.getLogger().debug("ERROR in parsing time :"+timel+" adjusting...");
// e.printStackTrace();
AnalysisLogger.getLogger().debug("Error in parsing...adjusting "+timel+" in "+time);
}
if (counter == 0) {
AnalysisLogger.getLogger().debug("Date detection: input " + timel + " output " + time);
}
ts.addElement(quantity, time, timel, counter);
counter++;

View File

@ -7,8 +7,6 @@ import org.gcube.contentmanagement.graphtools.utils.MathFunctions;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.interfaces.StandardLocalExternalAlgorithm;
import org.gcube.dataanalysis.ecoengine.signals.PeriodicityDetector;
@ -133,7 +131,7 @@ public class TimeSeriesAnalysis extends StandardLocalExternalAlgorithm {
inputs.add(p);
ColumnType p1 = new ColumnType(timeSeriesTable, valuesColumn, "The column containing the values of the time series", "values", false);
inputs.add(p1);
addDoubleInput(frequencyResolution, "The precision in detecting the period. The lower this number the lower the number of samples used at each step in the Spectrogram. Reducing this, the spectrogram will be finer and sharper, but you should tune it. Too few samples will make the Spectrogram noisy.", "1");
addDoubleInput(frequencyResolution, "The precision in detecting the period. The lower this number the less the number of points in the Spectrogram (higher number of samples used at each step). Reducing this, the spectrogram will be finer and sharper, but you should tune it. Too many samples will make the Spectrogram noisy.", "1");
addEnumerateInput(AggregationFunctions.values(), aggregationFunction, "Function to apply to samples with the same time instant", AggregationFunctions.SUM.name());
}