diff --git a/pkg/timedata/lttb.go b/pkg/timedata/lttb.go new file mode 100644 index 0000000..de74fab --- /dev/null +++ b/pkg/timedata/lttb.go @@ -0,0 +1,107 @@ +package timedata + +import ( + "math" +) + +// Copied from https://github.com/haoel/downsampling-algorithm + +// Largest triangle three buckets (LTTB) data downsampling algorithm implementation +// - Require: data . The original data +// - Require: threshold . Number of data points to be returned +func LTTB(data []Point, threshold int) []Point { + + if threshold >= len(data) || threshold == 0 { + return data // Nothing to do + } + + sampledData := make([]Point, 0, threshold) + + // Bucket size. Leave room for start and end data points + bucketSize := float64(len(data)-2) / float64(threshold-2) + + sampledData = append(sampledData, data[0]) // Always add the first point + + // We have 3 pointers represent for + // > bucketLow - the current bucket's beginning location + // > bucketMiddle - the current bucket's ending location, + // also the beginning location of next bucket + // > bucketHight - the next bucket's ending location. + bucketLow := 1 + bucketMiddle := int(math.Floor(bucketSize)) + 1 + + var prevMaxAreaPoint int + + for i := 0; i < threshold-2; i++ { + + bucketHigh := int(math.Floor(float64(i+2)*bucketSize)) + 1 + + // Calculate point average for next bucket (containing c) + avgPoint := calculateAverageDataPoint(data[bucketMiddle : bucketHigh+1]) + + // Get the range for current bucket + currBucketStart := bucketLow + currBucketEnd := bucketMiddle + + // Point a + pointA := data[prevMaxAreaPoint] + + maxArea := -1.0 + + var maxAreaPoint int + for ; currBucketStart < currBucketEnd; currBucketStart++ { + + area := calculateTriangleArea(pointA, avgPoint, data[currBucketStart]) + if area > maxArea { + maxArea = area + maxAreaPoint = currBucketStart + } + } + + sampledData = append(sampledData, data[maxAreaPoint]) // Pick this point from the bucket + prevMaxAreaPoint = maxAreaPoint // This MaxArea point is the next's prevMAxAreaPoint + + //move to the next window + bucketLow = bucketMiddle + bucketMiddle = bucketHigh + } + + sampledData = append(sampledData, data[len(data)-1]) // Always add last + + return sampledData +} + +func LTTB2(data []Point, threshold int) []Point { + buckets := splitDataBucket(data, threshold) + samples := LTTBForBuckets(buckets) + return samples +} + +func LTTBForBuckets(buckets [][]Point) []Point { + bucketCount := len(buckets) + sampledData := make([]Point, 0) + + sampledData = append(sampledData, buckets[0][0]) + + lastSelectedDataPoint := buckets[0][0] + for i := 1; i < bucketCount-1; i++ { + bucket := buckets[i] + averagePoint := calculateAveragePoint(buckets[i+1]) + + maxArea := -1.0 + maxAreaIndex := -1 + for j := 0; j < len(bucket); j++ { + point := bucket[j] + area := calculateTriangleArea(lastSelectedDataPoint, point, averagePoint) + + if area > maxArea { + maxArea = area + maxAreaIndex = j + } + } + lastSelectedDataPoint := bucket[maxAreaIndex] + sampledData = append(sampledData, lastSelectedDataPoint) + } + sampledData = append(sampledData, buckets[len(buckets)-1][0]) + return sampledData +} diff --git a/pkg/timedata/timedata.go b/pkg/timedata/timedata.go index 369ae44..0a5719d 100644 --- a/pkg/timedata/timedata.go +++ b/pkg/timedata/timedata.go @@ -8,9 +8,36 @@ import ( "github.com/cointop-sh/cointop/pkg/humanize" ) +// Point is a point on a line +type Point struct { + X float64 + Y float64 +} + // ResampleTimeSeriesData resamples the given [timestamp,value] data to numsteps between start-end (returns numSteps+1 points). // If the data does not extend past start/end then there will likely be NaN in the output data. func ResampleTimeSeriesData(data [][]float64, start float64, end float64, numSteps int) [][]float64 { + // Use linear interpolation for upsampling + if numSteps > len(data) { + return LinearInterpolateTimeSeriesData(data, start, end, numSteps) + } + + // Use FTTB for downsampling + var points []Point + for _, item := range data { + points = append(points, Point{X: item[0], Y: item[1]}) + } + + resultPoints := LTTB(points, numSteps) + + var newData [][]float64 + for _, item := range resultPoints { + newData = append(newData, []float64{item.X, item.Y}) + } + return newData +} + +func LinearInterpolateTimeSeriesData(data [][]float64, start float64, end float64, numSteps int) [][]float64 { var newData [][]float64 l := len(data) step := (end - start) / float64(numSteps) diff --git a/pkg/timedata/utils.go b/pkg/timedata/utils.go new file mode 100644 index 0000000..7456815 --- /dev/null +++ b/pkg/timedata/utils.go @@ -0,0 +1,84 @@ +package timedata + +import ( + "math" +) + +// Copied from https://github.com/haoel/downsampling-algorithm + +func calculateTriangleArea(pa, pb, pc Point) float64 { + area := ((pa.X-pc.X)*(pb.Y-pa.Y) - (pa.X-pb.X)*(pc.Y-pa.Y)) * 0.5 + return math.Abs(area) +} + +func calculateAverageDataPoint(points []Point) (avg Point) { + + for _, point := range points { + avg.X += point.X + avg.Y += point.Y + } + l := float64(len(points)) + avg.X /= l + avg.Y /= l + return avg +} + +func splitDataBucket(data []Point, threshold int) [][]Point { + + buckets := make([][]Point, threshold) + for i := range buckets { + buckets[i] = make([]Point, 0) + } + // First and last bucket are formed by the first and the last data points + buckets[0] = append(buckets[0], data[0]) + buckets[threshold-1] = append(buckets[threshold-1], data[len(data)-1]) + + // so we only have N - 2 buckets left to fill + bucketSize := float64(len(data)-2) / float64(threshold-2) + + //slice remove the first and last point + d := data[1 : len(data)-1] + + for i := 0; i < threshold-2; i++ { + bucketStartIdx := int(math.Floor(float64(i) * bucketSize)) + bucketEndIdx := int(math.Floor(float64(i+1)*bucketSize)) + 1 + if i == threshold-3 { + bucketEndIdx = len(d) + } + buckets[i+1] = append(buckets[i+1], d[bucketStartIdx:bucketEndIdx]...) + } + + return buckets +} + +func calculateAveragePoint(points []Point) Point { + l := len(points) + var p Point + for i := 0; i < l; i++ { + p.X += points[i].X + p.Y += points[i].Y + } + + p.X /= float64(l) + p.Y /= float64(l) + return p + +} + +func peakAndTroughPointIndex(points []Point) (int, int) { + max := -0.1 + min := math.MaxFloat64 + minIdx := 0 + maxIdx := 0 + for i := 0; i < len(points); i++ { + if points[i].Y > max { + max = points[i].Y + maxIdx = i + } + if points[i].Y < min { + min = points[i].Y + minIdx = i + } + } + return maxIdx, minIdx +}