1024 lines
26 KiB
Python
1024 lines
26 KiB
Python
import setpath
|
|
import functions
|
|
import math
|
|
from lib import iso8601
|
|
import re
|
|
import datetime
|
|
from fractions import Fraction
|
|
import json
|
|
from fractions import Fraction
|
|
|
|
|
|
__docformat__ = 'reStructuredText en'
|
|
|
|
class modeop:
|
|
|
|
"""
|
|
.. function:: modeop(X) -> [ModeOpElements int/str, ModeOpValue int]
|
|
|
|
Returns the mode (i.e. the value that occurs the most frequently in a data set), along with the modevalue (i.e. the maximum frequency of occurrence)
|
|
When more than one modes are found in a data set (i.e. when more than one values appear with the maximum frequency), all values are returned.
|
|
|
|
For a sample from a continuous distribution, such as [0.935..., 1.211..., 2.430..., 3.668..., 3.874...], the concept of mode is unusable in its raw form,
|
|
since each value will occur precisely once. Following the usual practice, data is discretized by rounding to the closer int value.
|
|
For a textual sample, values are first converted to lowercase.
|
|
|
|
:Returned multiset schema:
|
|
Columns are automatically named as *ModeOpElements, ModeOpValue*
|
|
|
|
.. seealso::
|
|
|
|
* :ref:`tutmultiset` functions
|
|
|
|
Examples:
|
|
|
|
>>> table1('''
|
|
... 1
|
|
... 3
|
|
... 6
|
|
... 6
|
|
... 6
|
|
... 6
|
|
... 7
|
|
... 7
|
|
... 7
|
|
... 7
|
|
... 12
|
|
... 12
|
|
... 17
|
|
... ''')
|
|
>>> sql("select modeop(a) from table1")
|
|
ModeOpElements | ModeOpValue
|
|
----------------------------
|
|
6 | 4
|
|
7 | 4
|
|
|
|
|
|
>>> table2('''
|
|
... 1.1235
|
|
... 1
|
|
... 5.1
|
|
... 5.2
|
|
... 5.3
|
|
... 5.5
|
|
... 5.6
|
|
... 5.7
|
|
... ''')
|
|
>>> sql("select modeop(a) from table2")
|
|
ModeOpElements | ModeOpValue
|
|
----------------------------
|
|
5 | 3
|
|
6 | 3
|
|
|
|
>>> table3('''
|
|
... leuteris
|
|
... maria
|
|
... marialena
|
|
... Meili
|
|
... meili
|
|
... ''')
|
|
>>> sql("select modeop(a) from table3")
|
|
ModeOpElements | ModeOpValue
|
|
----------------------------
|
|
meili | 2
|
|
|
|
.. doctest::
|
|
:hide:
|
|
|
|
>>> sql("delete from table3")
|
|
>>> sql("select modeop(a) from table3")
|
|
ModeOpElements | ModeOpValue
|
|
----------------------------
|
|
None | None
|
|
|
|
"""
|
|
registered=True #Value to define db operator
|
|
multiset=True
|
|
|
|
def __init__(self):
|
|
self.init=True
|
|
self.sample = []
|
|
self.modevalue = 0
|
|
|
|
def initargs(self, args):
|
|
self.init=False
|
|
if not args:
|
|
raise functions.OperatorError("modeop","No arguments")
|
|
if len(args)>1:
|
|
raise functions.OperatorError("modeop","Wrong number of arguments")
|
|
|
|
def step(self, *args):
|
|
if self.init==True:
|
|
self.initargs(args)
|
|
|
|
if isinstance(args[0], basestring):
|
|
#For the case of textual dataset, values are converted to lowercase
|
|
self.element = (args[0]).lower()
|
|
else:
|
|
#For the case of arithmetic dataset, values are rounded and converted to int
|
|
self.element = int(round(args[0]))
|
|
self.sample.append(self.element)
|
|
|
|
def final(self):
|
|
output=[]
|
|
|
|
if (not self.sample):
|
|
output+=['None']
|
|
modevalue='None'
|
|
else:
|
|
self.sample.sort()
|
|
|
|
# Initialize a dictionary to store frequency data.
|
|
frequency = {}
|
|
# Build dictionary: key - data set values; item - data frequency.
|
|
for x in self.sample:
|
|
if (x in frequency.keys()):
|
|
frequency[x]+=1
|
|
else:
|
|
frequency[x]=1
|
|
# Find the modeval, i.e. the maximum frequency
|
|
modevalue = max(frequency.values())
|
|
|
|
# If the value of mode is 1, there is no mode for the given data set.
|
|
if (modevalue == 1):
|
|
output+=['None']
|
|
modevalue='None'
|
|
else:
|
|
# Step through the frequency dictionary, looking for keys equaling
|
|
# the current modevalue. If found, append the key to output list.
|
|
for x in frequency:
|
|
if (modevalue == frequency[x]):
|
|
output+=[x]
|
|
|
|
#CREATE MULTISET OUTPUT
|
|
#print all keys, along with the modevlaue
|
|
yield ("ModeOpElements", "ModeOpValue")
|
|
for el in output:
|
|
yield (el, modevalue)
|
|
|
|
|
|
class median:
|
|
"""
|
|
.. function:: median(X) -> [median float]
|
|
|
|
Returns the median, i.e.numeric value separating the higher half of a sample, a population, or a probability distribution, from the lower half.
|
|
It is computed by arranging all the observations from lowest value to highest value and picking the middle one.
|
|
If there is an even number of observations, then there is no single middle value, so the mean of the two middle values is obtained.
|
|
Incoming textual values are simply ignored.
|
|
|
|
Examples:
|
|
|
|
>>> table1('''
|
|
... 1
|
|
... 3
|
|
... 6
|
|
... 6
|
|
... 6
|
|
... 6
|
|
... 7
|
|
... 7
|
|
... 7
|
|
... 7
|
|
... 12
|
|
... 12
|
|
... 17
|
|
... ''')
|
|
>>> sql("select median(a) from table1")
|
|
median(a)
|
|
---------
|
|
7.0
|
|
|
|
>>> table2('''
|
|
... 1
|
|
... 2
|
|
... 2
|
|
... 3
|
|
... 3
|
|
... 9
|
|
... ''')
|
|
>>> sql("select median(a) from table2")
|
|
median(a)
|
|
---------
|
|
2.5
|
|
|
|
>>> table3('''
|
|
... 1
|
|
... 2
|
|
... maria
|
|
... lala
|
|
... null
|
|
... 'None'
|
|
... 3
|
|
... 9
|
|
... ''')
|
|
>>> sql("select median(a) from table3")
|
|
median(a)
|
|
---------
|
|
2.5
|
|
|
|
.. doctest::
|
|
:hide:
|
|
|
|
>>> sql("delete from table3")
|
|
>>> sql("select median(a) from table3")
|
|
median(a)
|
|
---------
|
|
None
|
|
|
|
"""
|
|
registered=True #Value to define db operator
|
|
|
|
def __init__(self):
|
|
self.init=True
|
|
self.sample = []
|
|
self.counter=0
|
|
|
|
def initargs(self, args):
|
|
self.init=False
|
|
if not args:
|
|
raise functions.OperatorError("median","No arguments")
|
|
if len(args)>1:
|
|
raise functions.OperatorError("median","Wrong number of arguments")
|
|
|
|
def step(self, *args):
|
|
if self.init==True:
|
|
self.initargs(args)
|
|
|
|
if not(isinstance(args[0], basestring)) and args[0]:
|
|
self.counter +=1
|
|
self.element = float((args[0]))
|
|
self.sample.append(self.element)
|
|
|
|
def final(self):
|
|
if (not self.sample):
|
|
return
|
|
self.sample.sort()
|
|
|
|
"""Determine the value which is in the exact middle of the data set."""
|
|
if (self.counter%2): # Number of elements in data set is even.
|
|
self.median = self.sample[self.counter/2]
|
|
else: # Number of elements in data set is odd.
|
|
midpt = self.counter/2
|
|
self.median = (self.sample[midpt-1] + self.sample[midpt])/2.0
|
|
|
|
return self.median
|
|
|
|
|
|
|
|
class variance:
|
|
"""
|
|
.. function:: variance(X,[type]) -> [variance float]
|
|
|
|
Determine the measure of the spread of the data set about the mean.
|
|
Sample variance is determined by default; population variance can be
|
|
determined by setting the (optional) second argument to values 'true' or 'population'.
|
|
When values 'false' or 'sample' are entered for type, the default sample variance computation is performed.
|
|
|
|
Examples:
|
|
|
|
>>> table1('''
|
|
... 1
|
|
... 2
|
|
... 3
|
|
... 4
|
|
... 5
|
|
... 6
|
|
... 'text is ignored'
|
|
... 'none'
|
|
... ''')
|
|
>>> sql("select variance(a) from table1")
|
|
variance(a)
|
|
-----------
|
|
3.5
|
|
>>> sql("select variance(a,'false') from table1")
|
|
variance(a,'false')
|
|
-------------------
|
|
3.5
|
|
>>> sql("select variance(a,'sample') from table1")
|
|
variance(a,'sample')
|
|
--------------------
|
|
3.5
|
|
>>> sql("select variance(a,'True') from table1")
|
|
variance(a,'True')
|
|
------------------
|
|
2.91666666667
|
|
>>> sql("select variance(a,'Population') from table1")
|
|
variance(a,'Population')
|
|
------------------------
|
|
2.91666666667
|
|
|
|
.. doctest::
|
|
:hide:
|
|
|
|
>>> sql("delete from table1")
|
|
>>> sql("select variance(a) from table1")
|
|
variance(a)
|
|
-----------
|
|
None
|
|
"""
|
|
registered=True #Value to define db operator
|
|
|
|
def __init__(self):
|
|
self.init=True
|
|
self.population=False
|
|
self.n=0
|
|
self.mean=Fraction(0.0)
|
|
self.M2=Fraction(0.0)
|
|
|
|
def initargs(self, args):
|
|
self.init=False
|
|
if not args:
|
|
raise functions.OperatorError("sdev","No arguments")
|
|
elif len(args)==2:
|
|
tmp = args[1].lower()
|
|
if tmp=='false' or tmp=='sample':
|
|
self.population=False
|
|
elif tmp=='true' or tmp=='population':
|
|
self.population=True
|
|
else:
|
|
raise functions.OperatorError("sdev", "Wrong value in second argument"+'\n'+
|
|
"Accepted Values:"+'\n'
|
|
"----False, false, FALSE, sample---- for Sample Standard Deviation"+'\n'+
|
|
"----True, true, TRUE, population---- for Population Standard Deviation"+'\n')
|
|
elif len(args)>2:
|
|
raise functions.OperatorError("sdev","Wrong number of arguments")
|
|
|
|
def step(self, *args):
|
|
if self.init==True:
|
|
self.initargs(args)
|
|
|
|
try:
|
|
x=Fraction(args[0])
|
|
except KeyboardInterrupt:
|
|
raise
|
|
except:
|
|
return
|
|
self.n+=1
|
|
delta=x-self.mean
|
|
self.mean += delta / self.n
|
|
if self.n > 1:
|
|
self.M2 += delta * (x - self.mean)
|
|
|
|
def final(self):
|
|
if self.n==0:
|
|
return None
|
|
try:
|
|
if (not self.population and self.n>1): # Divide sum of squares by N-1 (sample variance).
|
|
variance = self.M2/(self.n-1)
|
|
else: # Divide sum of squares by N (population variance).
|
|
variance = self.M2/self.n
|
|
except:
|
|
variance = 0.0
|
|
|
|
return float(variance)
|
|
|
|
|
|
class stdev:
|
|
"""
|
|
.. function:: stdev(X,[type]) -> [stdev float]
|
|
|
|
Computes standard deviation of a dataset X, i.e. the square root of its variance.
|
|
Sample standard deviation is determined by default; population standard deviation can be
|
|
determined by setting the (optional) second argument to values 'true' or 'population'.
|
|
When values 'false' or 'sample' are entered for type, the default sample standard deviation
|
|
computation is performed.
|
|
|
|
Examples:
|
|
|
|
>>> table1('''
|
|
... 3
|
|
... 7
|
|
... 7
|
|
... 19
|
|
... 'text is ignored'
|
|
... 'none'
|
|
... ''')
|
|
>>> sql("select stdev(a) from table1")
|
|
stdev(a)
|
|
-------------
|
|
6.92820323028
|
|
>>> sql("select stdev(a,'population') from table1")
|
|
stdev(a,'population')
|
|
---------------------
|
|
6.0
|
|
>>> sql("select stdev(a,'true') from table1")
|
|
stdev(a,'true')
|
|
---------------
|
|
6.0
|
|
|
|
.. doctest::
|
|
:hide:
|
|
|
|
>>> sql("delete from table1")
|
|
>>> sql("select stdev(a) from table1")
|
|
stdev(a)
|
|
--------
|
|
None
|
|
|
|
"""
|
|
|
|
registered=True #Value to define db operator
|
|
|
|
def __init__(self):
|
|
self.init=True
|
|
self.population=False
|
|
self.n=0
|
|
self.mean=Fraction(0.0)
|
|
self.M2=Fraction(0.0)
|
|
|
|
def initargs(self, args):
|
|
self.init=False
|
|
if not args:
|
|
raise functions.OperatorError("sdev","No arguments")
|
|
elif len(args)==2:
|
|
tmp = args[1].lower()
|
|
if tmp=='false' or tmp=='sample':
|
|
self.population=False
|
|
elif tmp=='true' or tmp=='population':
|
|
self.population=True
|
|
else:
|
|
raise functions.OperatorError("sdev", "Wrong value in second argument"+'\n'+
|
|
"Accepted Values:"+'\n'
|
|
"----False, false, FALSE, sample---- for Sample Standard Deviation"+'\n'+
|
|
"----True, true, TRUE, population---- for Population Standard Deviation"+'\n')
|
|
elif len(args)>2:
|
|
raise functions.OperatorError("sdev","Wrong number of arguments")
|
|
|
|
def step(self, *args):
|
|
if self.init==True:
|
|
self.initargs(args)
|
|
|
|
try:
|
|
x=Fraction(args[0])
|
|
except KeyboardInterrupt:
|
|
raise
|
|
except:
|
|
return
|
|
self.n+=1
|
|
delta=x-self.mean
|
|
self.mean += delta / self.n
|
|
if self.n > 1:
|
|
self.M2 += delta * (x - self.mean)
|
|
|
|
def final(self):
|
|
if self.n==0:
|
|
return None
|
|
try:
|
|
if (not self.population and self.n>1): # Divide sum of squares by N-1 (sample variance).
|
|
variance = self.M2/(self.n-1)
|
|
else: # Divide sum of squares by N (population variance).
|
|
variance = self.M2/self.n
|
|
except:
|
|
variance = 0.0
|
|
|
|
return math.sqrt(variance)
|
|
|
|
|
|
class rangef:
|
|
|
|
"""
|
|
.. function:: rangef(X) -> [rangef float]
|
|
|
|
Computes the numerical range for a dataset X, substracting the minimum value from the maximum value.
|
|
Textal and NULL data entries are simply ignored.
|
|
|
|
Examples:
|
|
|
|
>>> table1('''
|
|
... 1
|
|
... 3
|
|
... 6
|
|
... 6
|
|
... 7
|
|
... 12
|
|
... 12
|
|
... 17
|
|
... 'text is ignored'
|
|
... 'None'
|
|
... ''')
|
|
>>> sql("select rangef(a) from table1")
|
|
rangef(a)
|
|
---------
|
|
16.0
|
|
|
|
.. doctest::
|
|
:hide:
|
|
|
|
>>> sql("delete from table1")
|
|
>>> sql("select rangef(a) from table1")
|
|
rangef(a)
|
|
---------
|
|
None
|
|
"""
|
|
registered=True #Value to define db operator
|
|
|
|
|
|
def __init__(self):
|
|
self.init=True
|
|
self.sample=[]
|
|
|
|
def initargs(self, args):
|
|
self.init=False
|
|
if len(args)<>1:
|
|
raise functions.OperatorError("rangef","Wrong number of arguments")
|
|
|
|
def step(self, *args):
|
|
if not(isinstance(args[0], basestring)) and args[0]:
|
|
self.sample.append(float(args[0]))
|
|
|
|
def final(self):
|
|
if (not self.sample):
|
|
return
|
|
self.range=max(self.sample) - min(self.sample)
|
|
return self.range
|
|
|
|
|
|
class amean:
|
|
"""
|
|
.. function:: amean(X) -> [amean float]
|
|
|
|
Computes the arithmetic mean, i.e. the average, thus providing an alternative choise
|
|
to traditional *avg* offered by sqlite.
|
|
|
|
Examples:
|
|
|
|
>>> table1('''
|
|
... 1
|
|
... 2
|
|
... 2
|
|
... 3
|
|
... 'text is ignored, as well as null values'
|
|
... 'none'
|
|
... ''')
|
|
>>> sql("select amean(a) from table1")
|
|
amean(a)
|
|
--------
|
|
2.0
|
|
|
|
.. doctest::
|
|
:hide:
|
|
|
|
>>> sql("delete from table1")
|
|
>>> sql("select amean(a) from table1")
|
|
amean(a)
|
|
--------
|
|
None
|
|
|
|
"""
|
|
registered=True #Value to define db function
|
|
|
|
def __init__(self):
|
|
self.init=True
|
|
self.counter=0
|
|
self.sum=0.0
|
|
self.sample=[]
|
|
|
|
def initargs(self, args):
|
|
self.init=False
|
|
if not args:
|
|
raise functions.OperatorError("amean","No arguments")
|
|
elif len(args)>1:
|
|
raise functions.OperatorError("amean","Wrong number of arguments")
|
|
|
|
def step(self, *args):
|
|
if self.init==True:
|
|
self.initargs(args)
|
|
|
|
if not(isinstance(args[0], basestring)) and args[0]:
|
|
self.sample.append(float(args[0]))
|
|
self.sum += float(args[0])
|
|
self.counter+=1
|
|
|
|
def final(self):
|
|
if (not self.sample):
|
|
return
|
|
return self.sum/self.counter
|
|
|
|
|
|
class wamean:
|
|
"""
|
|
.. function:: wamean(W,X) -> [wamean float]
|
|
|
|
Computes the weighted arithmetic mean, i.e. the weighted average.
|
|
First column contains the weights and second column contains the actual data values.
|
|
|
|
.. math::
|
|
|
|
wamean_{\mathrm} = \sum_{i=1}^{N} w_i x_i / \sum_{i=1}^{N} w_i
|
|
|
|
|
|
Examples:
|
|
|
|
>>> table1('''
|
|
... 2 1
|
|
... 2 2
|
|
... 1 2
|
|
... 'text is ignored, as well as null values' 3
|
|
... 'none' 2
|
|
... 1 'text is ignored, as well as null values'
|
|
... 2 'none'
|
|
... 2 3
|
|
... ''')
|
|
>>> sql("select wamean(a,b) from table1")
|
|
wamean(a,b)
|
|
-----------
|
|
2.0
|
|
|
|
.. doctest::
|
|
:hide:
|
|
|
|
>>> sql("delete from table1")
|
|
>>> sql("select wamean(a) from table1")
|
|
wamean(a)
|
|
---------
|
|
None
|
|
|
|
"""
|
|
registered=True #Value to define db operator
|
|
|
|
def __init__(self):
|
|
self.init=True
|
|
self.counter=0
|
|
self.sum=0.0
|
|
|
|
def initargs(self, args):
|
|
self.init=False
|
|
if (len(args)<>2):
|
|
raise functions.OperatorError("wamean","Wrong number of arguments")
|
|
|
|
def step(self, *args):
|
|
if self.init==True:
|
|
self.initargs(args)
|
|
|
|
if not(isinstance(args[0], basestring)) and args[0] and not(isinstance(args[1], basestring)) and args[1]:
|
|
self.sum += args[0]*args[1]
|
|
self.counter+=args[0]
|
|
|
|
def final(self):
|
|
if (self.counter==0):
|
|
return
|
|
return self.sum/self.counter
|
|
|
|
|
|
class gmean:
|
|
"""
|
|
.. function:: gmean(X,[m]) -> [gmean float]
|
|
|
|
Computes the genaralized mean (also known as the power mean or Holder mean),
|
|
which is an abstraction of the *Pythagorean means* including *arithmetic*, *geometric*, and *harmonic* means.
|
|
|
|
It is defined for a set of *n* positive real numbers as follows:
|
|
|
|
.. math::
|
|
|
|
gmean_{\mathrm} = \Big ( {1 \over N} \sum_{i=1}^{N} x_i ^p \Big ) ^{1/p}
|
|
|
|
|
|
|
|
The (optional) second argument stands for the *p* paramteter, thus determining the exact mean type:
|
|
|
|
- p=2 : *Quadratic mean* (computed for both negative and positive values)
|
|
|
|
- p=1 : *Artihmetic mean*
|
|
|
|
- p=0 : *Geometric mean* (only for positive real numbers)
|
|
|
|
- p=-1: *Harmonian mean* (only for positive real numbers)
|
|
|
|
By default, i.e. in absence of second argument, p is set to 0, computing
|
|
the geometric mean.
|
|
|
|
Examples:
|
|
|
|
>>> table1('''
|
|
... 6
|
|
... 50
|
|
... 9
|
|
... 1200
|
|
... 'text is ignored, as well as None values'
|
|
... 'None'
|
|
... ''')
|
|
>>> sql("select gmean(a) from table1")
|
|
gmean(a)
|
|
-------------
|
|
42.4264068712
|
|
|
|
>>> table2('''
|
|
... 34
|
|
... 27
|
|
... 45
|
|
... 55
|
|
... 22
|
|
... 34
|
|
... ''')
|
|
>>> sql("select gmean(a,1) from table2")
|
|
gmean(a,1)
|
|
-------------
|
|
36.1666666667
|
|
>>> sql("select gmean(a,0) from table2")
|
|
gmean(a,0)
|
|
-------------
|
|
34.5451100372
|
|
>>> sql("select gmean(a) from table2")
|
|
gmean(a)
|
|
-------------
|
|
34.5451100372
|
|
>>> sql("select gmean(a,-1) from table2")
|
|
gmean(a,-1)
|
|
-------------
|
|
33.0179836512
|
|
>>> sql("select gmean(a,2) from table2")
|
|
gmean(a,2)
|
|
-------------
|
|
37.8043207407
|
|
|
|
|
|
|
|
"""
|
|
registered=True #Value to define db operator
|
|
|
|
def __init__(self):
|
|
self.init=True
|
|
self.counter=0
|
|
self.sum=0.0
|
|
self.p=0.0
|
|
self.result=0.0
|
|
|
|
def initargs(self, args):
|
|
self.init=False
|
|
|
|
if not args:
|
|
raise functions.OperatorError("gmean","No arguments")
|
|
elif len(args)>2:
|
|
raise functions.OperatorError("gmean","Wrong number of arguments")
|
|
elif len(args)==2:
|
|
self.p=args[1]
|
|
if self.p>2 or self.p<-1:
|
|
raise functions.OperatorError("\n gmean","Second argument takes values from -1 to 2\n"+
|
|
"p=2 :quadratic mean (for both negative and positive values)\n"+
|
|
"p=1 :artihmetic mean\n"+
|
|
"p=0 :geometric mean (for positive real numbers)\n"+
|
|
"p=-1:harmonian mean (for positive real numbers)\n")
|
|
|
|
def step(self, *args):
|
|
if self.init==True:
|
|
self.initargs(args)
|
|
if not(isinstance(args[0], basestring)) and args[0]:
|
|
if self.p<1 and args[0]<1:
|
|
raise functions.OperatorError("gmean","The specified type of mean applies only to positive numbers")
|
|
# The easiest way to think of the geometric mean is that
|
|
#it is the average of the logarithmic values, converted back to a base 10 number.
|
|
if self.p==0:
|
|
self.sum += math.log10(args[0])
|
|
else:
|
|
self.sum += args[0]**self.p
|
|
self.counter +=1
|
|
|
|
def final(self):
|
|
if (self.counter==0):
|
|
return
|
|
if self.p==0:
|
|
result = 10**(self.sum/self.counter)
|
|
return result
|
|
else:
|
|
return (self.sum/self.counter)**(1.0/self.p)
|
|
|
|
re_now=re.compile('now:(?P<now>.*)')
|
|
|
|
|
|
class frecency:
|
|
"""
|
|
.. function:: frecency(actiondate[,points[,now:date]])
|
|
|
|
Returns a float weighted sum assigning to each action *points* or less, depending on the *actiondate* distance to the current date (or *now:date*).
|
|
In detail the action points decrease 30% at distance 10-30 days, 50% at 1-3 months, 70% at 3-6 months and 90% at greater distance. Date parameters should be in ISO8601 format.
|
|
|
|
.. _iso8601:
|
|
|
|
**ISO 8601 format** :
|
|
|
|
Year:
|
|
YYYY (eg 1997)
|
|
Year and month:
|
|
YYYY-MM (eg 1997-07)
|
|
Complete date:
|
|
YYYY-MM-DD (eg 1997-07-16)
|
|
Complete date plus hours and minutes:
|
|
YYYY-MM-DD hh:mmTZD (eg 1997-07-16 19:20+01:00)
|
|
Complete date plus hours, minutes and seconds:
|
|
YYYY-MM-DD hh:mm:ssTZD (eg 1997-07-16 19:20:30+01:00)
|
|
Complete date plus hours and minutes:
|
|
YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)
|
|
Complete date plus hours, minutes and seconds:
|
|
YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
|
|
|
|
Examples:
|
|
|
|
>>> table1('''
|
|
... 2009-06-01 1
|
|
... 2009-08-28 2
|
|
... 2009-09-17 3
|
|
... ''')
|
|
>>> sql("select frecency(a,'now:2009-09-26 04:38:30') from table1")
|
|
frecency(a,'now:2009-09-26 04:38:30')
|
|
-------------------------------------
|
|
200.0
|
|
|
|
|
|
"""
|
|
|
|
registered=True #Value to define db operator
|
|
|
|
def __init__(self):
|
|
self.frecency=0
|
|
self.initstatic=False
|
|
self.points=None
|
|
self.now=None
|
|
|
|
def __decrease(self,offsettimedelta):
|
|
if offsettimedelta<=datetime.timedelta(days=10):
|
|
return 1.0
|
|
if offsettimedelta<=datetime.timedelta(days=30):
|
|
return 0.7
|
|
if offsettimedelta<=datetime.timedelta(days=(30*3)):
|
|
return 0.5
|
|
if offsettimedelta<=datetime.timedelta(days=(30*6)):
|
|
return 0.3
|
|
return 0.1
|
|
|
|
def step(self, *args):
|
|
if not args:
|
|
raise functions.OperatorError("frecency","No arguments")
|
|
# last 2 arguments are static , so they are parse only the first time
|
|
if not self.initstatic:
|
|
self.initstatic=True
|
|
self.points=100.0
|
|
self.now=datetime.datetime.now()
|
|
if len(args)>=2:
|
|
for arg in args[1:]:
|
|
isnowarg=re_now.match(arg)
|
|
if isnowarg:
|
|
nowdate=isnowarg.groupdict()['now']
|
|
self.now=iso8601.parse_date(nowdate)
|
|
else:
|
|
self.points=int(arg)
|
|
|
|
input=args[0]
|
|
dt=iso8601.parse_date(input)
|
|
self.frecency+=self.__decrease(self.now-dt)*self.points
|
|
|
|
def final(self):
|
|
return self.frecency
|
|
|
|
|
|
class pearson:
|
|
|
|
"""
|
|
.. function:: pearson(X,Y) -> float
|
|
|
|
Computes the pearson coefficient of X and Y datasets
|
|
|
|
Examples:
|
|
|
|
>>> sql("select pearson(c1,1/c1) from range(1,91)")
|
|
pearson(c1,1/c1)
|
|
----------------
|
|
-0.181568259801
|
|
|
|
>>> sql("select pearson(c1,17*c1+5) from range(1,91)")
|
|
pearson(c1,17*c1+5)
|
|
-------------------
|
|
1.0
|
|
|
|
>>> sql("select pearson(c1,pyfun('math.pow',2,c1)) from range(1,41)")
|
|
pearson(c1,pyfun('math.pow',2,c1))
|
|
----------------------------------
|
|
0.456349821382
|
|
|
|
>>> sql("select pearson(a,b) from (select 1 as a, 2 as b)")
|
|
pearson(a,b)
|
|
------------
|
|
0
|
|
"""
|
|
|
|
registered=True #Value to define db operator
|
|
sum_x=0
|
|
sum_y=0
|
|
|
|
def __init__(self):
|
|
self.sX=Fraction(0)
|
|
self.sX2=Fraction(0)
|
|
self.sY=Fraction(0)
|
|
self.sY2=Fraction(0)
|
|
self.sXY=Fraction(0)
|
|
self.n=0
|
|
|
|
def step(self,*args):
|
|
try:
|
|
x, y = [Fraction(i) for i in args[:2]]
|
|
except KeyboardInterrupt:
|
|
raise
|
|
except:
|
|
return
|
|
self.n+=1
|
|
self.sX+=x
|
|
self.sY+=y
|
|
self.sX2+=x*x
|
|
self.sY2+=y*y
|
|
self.sXY+=x*y
|
|
|
|
def final(self):
|
|
if self.n==0:
|
|
return None
|
|
|
|
d = (math.sqrt(self.n*self.sX2-self.sX*self.sX)*math.sqrt(self.n*self.sY2-self.sY*self.sY))
|
|
|
|
if d == 0:
|
|
return 0
|
|
|
|
return float((self.n*self.sXY-self.sX*self.sY)/d)
|
|
|
|
|
|
class fsum:
|
|
"""
|
|
.. function:: fsum(X) -> json
|
|
|
|
Computes the sum using fractional computation. It return the result in json format
|
|
|
|
Examples:
|
|
|
|
>>> table1('''
|
|
... 1
|
|
... 2
|
|
... 2
|
|
... 10
|
|
... ''')
|
|
|
|
>>> sql("select fsum(a) from table1")
|
|
fsum(a)
|
|
-------
|
|
[15, 1]
|
|
|
|
>>> table1('''
|
|
... 0.99999999
|
|
... 3.99999999
|
|
... 0.78978989
|
|
... 1.99999999
|
|
... ''')
|
|
|
|
>>> sql("select fsum(a) from table1")
|
|
fsum(a)
|
|
-------------------------------------
|
|
[70164189421580937, 9007199254740992]
|
|
"""
|
|
|
|
registered = True
|
|
|
|
def __init__(self):
|
|
self.init = True
|
|
self.x = Fraction(0.0)
|
|
|
|
def step(self, *args):
|
|
if self.init:
|
|
self.init = False
|
|
if not args:
|
|
raise functions.OperatorError("fsum","No arguments")
|
|
|
|
try:
|
|
if type(args[0]) in (int, float, long):
|
|
x = Fraction(args[0])
|
|
else:
|
|
try:
|
|
json_object = json.loads(args[0])
|
|
x = Fraction(json_object[0], json_object[1])
|
|
except ValueError, e:
|
|
return
|
|
except KeyboardInterrupt:
|
|
raise
|
|
except:
|
|
return
|
|
|
|
self.x += x
|
|
|
|
def final(self):
|
|
return json.dumps([self.x.numerator, self.x.denominator])
|
|
|
|
|
|
|
|
|
|
|
|
if not ('.' in __name__):
|
|
"""
|
|
This is needed to be able to test the function, put it at the end of every
|
|
new function you create
|
|
"""
|
|
import sys
|
|
import setpath
|
|
from functions import *
|
|
testfunction()
|
|
if __name__ == "__main__":
|
|
reload(sys)
|
|
sys.setdefaultencoding('utf-8')
|
|
import doctest
|
|
doctest.testmod()
|