new operator 'groupby'
This commit is contained in:
parent
d5ced343c4
commit
3b2ef7927b
@ -143,13 +143,12 @@ func (dict *DictType) HasKey(target any) (ok bool) {
|
||||
return
|
||||
}
|
||||
|
||||
func (dict *DictType) SetItem(key any, value any) (err error) {
|
||||
func (dict *DictType) SetItem(key any, value any) {
|
||||
(*dict)[key] = value
|
||||
return
|
||||
}
|
||||
|
||||
func (dict *DictType) GetItem(key any) (value any, err error) {
|
||||
value = (*dict)[key]
|
||||
func (dict *DictType) GetItem(key any) (value any, exists bool) {
|
||||
value, exists = (*dict)[key]
|
||||
return
|
||||
}
|
||||
|
||||
@ -169,6 +168,24 @@ func (dict *DictType) Merge(second *DictType) {
|
||||
}
|
||||
}
|
||||
|
||||
func (dict *DictType) Equals(dict2 DictType) (answer bool) {
|
||||
if dict2 != nil && len(*dict) == len(dict2) {
|
||||
answer = true
|
||||
for key, value1 := range *dict {
|
||||
if value2, exists := dict2.GetItem(key); exists {
|
||||
if !Equal(value1, value2) {
|
||||
answer = false
|
||||
break
|
||||
}
|
||||
} else {
|
||||
answer = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
////////////////
|
||||
|
||||
type DictFormat interface {
|
||||
|
||||
@ -220,11 +220,12 @@ func anyToFract(v any) (f *FractionType, err error) {
|
||||
if f, ok = v.(*FractionType); !ok {
|
||||
if n, ok := v.(int64); ok {
|
||||
f = intToFraction(n)
|
||||
} else if dec, ok := v.(float64); ok {
|
||||
f, err = Float64ToFraction(dec)
|
||||
} else {
|
||||
err = ErrExpectedGot("fract", TypeFraction, v)
|
||||
}
|
||||
}
|
||||
if f == nil {
|
||||
err = ErrExpectedGot("fract", TypeFraction, v)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@ -273,14 +274,16 @@ func CmpAnyFract(af1, af2 any) (result int, err error) {
|
||||
// =0 if af1 == af2
|
||||
// >0 if af1 > af2
|
||||
func cmpFract(f1, f2 *FractionType) (result int) {
|
||||
f2.num = -f2.num
|
||||
f := SumFract(f1, f2)
|
||||
if f.num < 0 {
|
||||
result = -1
|
||||
} else if f.num > 0 {
|
||||
result = 1
|
||||
} else {
|
||||
result = 0
|
||||
if f1 != nil && f2 != nil {
|
||||
f2.num = -f2.num
|
||||
f := SumFract(f1, f2)
|
||||
if f.num < 0 {
|
||||
result = -1
|
||||
} else if f.num > 0 {
|
||||
result = 1
|
||||
} else {
|
||||
result = 0
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@ -50,13 +50,13 @@ func ListFromStrings(stringList []string) (list *ListType) {
|
||||
return
|
||||
}
|
||||
|
||||
func (ls *ListType) ToString(opt FmtOpt) (s string) {
|
||||
func (dict *ListType) ToString(opt FmtOpt) (s string) {
|
||||
indent := GetFormatIndent(opt)
|
||||
flags := GetFormatFlags(opt)
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteByte('[')
|
||||
if len(*ls) > 0 {
|
||||
if len(*dict) > 0 {
|
||||
innerOpt := MakeFormatOptions(flags, indent+1)
|
||||
nest := strings.Repeat(" ", indent+1)
|
||||
|
||||
@ -64,7 +64,7 @@ func (ls *ListType) ToString(opt FmtOpt) (s string) {
|
||||
sb.WriteByte('\n')
|
||||
sb.WriteString(nest)
|
||||
}
|
||||
for i, item := range []any(*ls) {
|
||||
for i, item := range []any(*dict) {
|
||||
if i > 0 {
|
||||
if flags&MultiLine != 0 {
|
||||
sb.WriteString(",\n")
|
||||
@ -96,19 +96,19 @@ func (ls *ListType) ToString(opt FmtOpt) (s string) {
|
||||
return
|
||||
}
|
||||
|
||||
func (ls *ListType) String() string {
|
||||
return ls.ToString(0)
|
||||
func (dict *ListType) String() string {
|
||||
return dict.ToString(0)
|
||||
}
|
||||
|
||||
func (ls *ListType) TypeName() string {
|
||||
func (dict *ListType) TypeName() string {
|
||||
return "list"
|
||||
}
|
||||
|
||||
func (ls *ListType) Contains(t *ListType) (answer bool) {
|
||||
if len(*ls) >= len(*t) {
|
||||
func (dict *ListType) Contains(t *ListType) (answer bool) {
|
||||
if len(*dict) >= len(*t) {
|
||||
answer = true
|
||||
for _, item := range *t {
|
||||
if answer = ls.IndexDeepSameCmp(item) >= 0; !answer {
|
||||
if answer = dict.IndexDeepSameCmp(item) >= 0; !answer {
|
||||
break
|
||||
}
|
||||
}
|
||||
@ -120,8 +120,11 @@ func (ls1 *ListType) Equals(ls2 ListType) (answer bool) {
|
||||
if ls2 != nil && len(*ls1) == len(ls2) {
|
||||
answer = true
|
||||
for index, i1 := range *ls1 {
|
||||
// if i1 != (ls2)[index] {
|
||||
if !reflect.DeepEqual(i1, ls2[index]) {
|
||||
// if !reflect.DeepEqual(i1, ls2[index]) {
|
||||
// answer = false
|
||||
// break
|
||||
// }
|
||||
if !Equal(i1, ls2[index]) {
|
||||
answer = false
|
||||
break
|
||||
}
|
||||
@ -130,11 +133,11 @@ func (ls1 *ListType) Equals(ls2 ListType) (answer bool) {
|
||||
return
|
||||
}
|
||||
|
||||
func (list *ListType) IndexDeepSameCmp(target any) (index int) {
|
||||
func (dict *ListType) IndexDeepSameCmp(target any) (index int) {
|
||||
var eq bool
|
||||
var err error
|
||||
index = -1
|
||||
for i, item := range *list {
|
||||
for i, item := range *dict {
|
||||
if eq, err = deepSame(item, target, SameContent); err != nil {
|
||||
break
|
||||
} else if eq {
|
||||
@ -185,15 +188,15 @@ func deepSame(a, b any, deepCmp DeepFuncTemplate) (eq bool, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func (list *ListType) SetItem(index int64, value any) (err error) {
|
||||
if index >= 0 && index < int64(len(*list)) {
|
||||
(*list)[index] = value
|
||||
func (dict *ListType) SetItem(index int64, value any) (err error) {
|
||||
if index >= 0 && index < int64(len(*dict)) {
|
||||
(*dict)[index] = value
|
||||
} else {
|
||||
err = fmt.Errorf("index %d out of bounds (0, %d)", index, len(*list)-1)
|
||||
err = fmt.Errorf("index %d out of bounds (0, %d)", index, len(*dict)-1)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (list *ListType) AppendItem(value any) {
|
||||
*list = append(*list, value)
|
||||
func (dict *ListType) AppendItem(value any) {
|
||||
*dict = append(*dict, value)
|
||||
}
|
||||
|
||||
@ -49,7 +49,7 @@ func assignCollectionItem(ctx kern.ExprContext, collectionTerm, keyListTerm *ter
|
||||
err = keyListTerm.Errorf("integer expected, got %v [%s]", keyValue, kern.TypeName(keyValue))
|
||||
}
|
||||
case *kern.DictType:
|
||||
err = collection.SetItem(keyValue, value)
|
||||
collection.SetItem(keyValue, value)
|
||||
default:
|
||||
err = collectionTerm.Errorf("collection expected")
|
||||
}
|
||||
|
||||
@ -45,15 +45,20 @@ func evalDot(ctx kern.ExprContext, opTerm *term) (v any, err error) {
|
||||
err = indexTerm.tk.ErrorExpectedGot("identifier")
|
||||
}
|
||||
case *kern.DictType:
|
||||
var ok bool
|
||||
s := opTerm.children[1].symbol()
|
||||
if s == SymVariable || s == SymString {
|
||||
src := opTerm.children[1].Source()
|
||||
if len(src) > 1 && src[0] == '"' && src[len(src)-1] == '"' {
|
||||
src = src[1 : len(src)-1]
|
||||
}
|
||||
v, err = unboxedValue.GetItem(src)
|
||||
if v, ok = unboxedValue.GetItem(src); !ok {
|
||||
err = opTerm.errKeyNotFound(src)
|
||||
}
|
||||
} else if rightValue, err = opTerm.children[1].Compute(ctx); err == nil {
|
||||
v, err = unboxedValue.GetItem(rightValue)
|
||||
if v, ok = unboxedValue.GetItem(rightValue); !ok {
|
||||
err = opTerm.errKeyNotFound(rightValue)
|
||||
}
|
||||
}
|
||||
default:
|
||||
if rightValue, err = opTerm.children[1].Compute(ctx); err == nil {
|
||||
|
||||
105
operator-groupby.go
Normal file
105
operator-groupby.go
Normal file
@ -0,0 +1,105 @@
|
||||
// Copyright (c) 2024-2026 Celestino Amoroso (celestino.amoroso@gmail.com).
|
||||
// All rights reserved.
|
||||
|
||||
// operator-groupby.go
|
||||
package expr
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"git.portale-stac.it/go-pkg/expr/kern"
|
||||
)
|
||||
|
||||
//-------- group by term
|
||||
|
||||
func newGroupByTerm(tk *Token) (inst *term) {
|
||||
return &term{
|
||||
tk: *tk,
|
||||
children: make([]*term, 0, 2),
|
||||
position: posInfix,
|
||||
priority: priIterOp,
|
||||
evalFunc: evalGroupBy,
|
||||
}
|
||||
}
|
||||
|
||||
func evalGroupBy(ctx kern.ExprContext, opTerm *term) (v any, err error) {
|
||||
var leftValue, rightValue any
|
||||
var it kern.Iterator
|
||||
var item any
|
||||
var sKey string
|
||||
var keyByIndex bool
|
||||
|
||||
if err = opTerm.checkOperands(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if leftValue, err = opTerm.children[0].Compute(ctx); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if it, err = NewIterator(leftValue); err != nil {
|
||||
return nil, fmt.Errorf("left operand of MAP must be an iterable data-source; got %s", kern.TypeName(leftValue))
|
||||
}
|
||||
|
||||
rightTk := opTerm.children[1].tk
|
||||
if rightTk.IsSymbol(SymVariable) && rightTk.source == "__" {
|
||||
keyByIndex = true
|
||||
} else if rightValue, err = opTerm.children[1].Compute(ctx); err != nil {
|
||||
return
|
||||
} else if kern.IsString(rightValue) {
|
||||
sKey = rightValue.(string)
|
||||
} else {
|
||||
return nil, fmt.Errorf("right operand of GROUPBY must be a string or identifier '__'; got %s", kern.TypeName(rightValue))
|
||||
}
|
||||
|
||||
values := kern.MakeDict()
|
||||
for item, err = it.Next(); err == nil; item, err = it.Next() {
|
||||
ctx.SetVar("_", item)
|
||||
ctx.SetVar("__", it.Index())
|
||||
ctx.SetVar("_#", it.Count())
|
||||
|
||||
var sItemKey string
|
||||
|
||||
if d, ok := item.(*kern.DictType); ok {
|
||||
if keyByIndex || len(sKey) == 0 {
|
||||
sItemKey = strconv.Itoa(int(it.Index()))
|
||||
} else if d.HasKey(sKey) {
|
||||
if keyValue, exists := d.GetItem(sKey); exists {
|
||||
sItemKey = fmt.Sprintf("%v", keyValue)
|
||||
} else {
|
||||
sItemKey = "_"
|
||||
}
|
||||
} else {
|
||||
sItemKey = "_"
|
||||
}
|
||||
} else {
|
||||
sItemKey = strconv.Itoa(int(it.Index()))
|
||||
}
|
||||
|
||||
var ls *kern.ListType
|
||||
if lsAny, exists := values.GetItem(sItemKey); exists && lsAny != nil {
|
||||
ls = lsAny.(*kern.ListType)
|
||||
}
|
||||
if ls == nil {
|
||||
ls = kern.NewListA()
|
||||
}
|
||||
ls.AppendItem(item)
|
||||
values.SetItem(sItemKey, ls)
|
||||
|
||||
ctx.DeleteVar("_#")
|
||||
ctx.DeleteVar("__")
|
||||
ctx.DeleteVar("_")
|
||||
}
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
}
|
||||
v = values
|
||||
return
|
||||
}
|
||||
|
||||
// init
|
||||
func init() {
|
||||
registerTermConstructor(SymKwGroupBy, newGroupByTerm)
|
||||
}
|
||||
@ -139,6 +139,7 @@ func init() {
|
||||
SymKwFilter: {"filter", symClassOperator, posInfix},
|
||||
SymKwDigest: {"digest", symClassOperator, posInfix},
|
||||
SymKwJoin: {"join", symClassOperator, posInfix},
|
||||
SymKwGroupBy: {"groupby", symClassOperator, posInfix},
|
||||
SymKwFunc: {"func(", symClassDeclaration, posPrefix},
|
||||
SymKwBuiltin: {"builtin", symClassOperator, posPrefix},
|
||||
SymKwPlugin: {"plugin", symClassOperator, posPrefix},
|
||||
|
||||
@ -122,6 +122,7 @@ const (
|
||||
SymKwMap
|
||||
SymKwFilter
|
||||
SymKwDigest
|
||||
SymKwGroupBy
|
||||
SymKwJoin
|
||||
SymKwNil
|
||||
SymKwUnset
|
||||
@ -147,5 +148,6 @@ func init() {
|
||||
"UNSET": SymKwUnset,
|
||||
"DIGEST": SymKwDigest,
|
||||
"JOIN": SymKwJoin,
|
||||
"GROUPBY": SymKwGroupBy,
|
||||
}
|
||||
}
|
||||
|
||||
@ -6,7 +6,6 @@ package expr
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
@ -74,7 +73,6 @@ func doTest(t *testing.T, ctx kern.ExprContext, section string, input *inputType
|
||||
var ast Expr
|
||||
var gotResult any
|
||||
var gotErr error
|
||||
var eq, eqDone bool
|
||||
|
||||
wantErr := getWantedError(input)
|
||||
|
||||
@ -93,18 +91,24 @@ func doTest(t *testing.T, ctx kern.ExprContext, section string, input *inputType
|
||||
gotResult, gotErr = ast.Eval(ctx)
|
||||
}
|
||||
|
||||
if input.wantResult != nil && gotResult != nil {
|
||||
if ls1, ok := input.wantResult.(*kern.ListType); ok {
|
||||
if ls2, ok := gotResult.(*kern.ListType); ok {
|
||||
eq = ls1.Equals(*ls2)
|
||||
eqDone = true
|
||||
}
|
||||
}
|
||||
}
|
||||
eq := kern.Equal(gotResult, input.wantResult)
|
||||
// if input.wantResult != nil && gotResult != nil {
|
||||
// if ls1, ok := input.wantResult.(*kern.ListType); ok {
|
||||
// if ls2, ok := gotResult.(*kern.ListType); ok {
|
||||
// eq = ls1.Equals(*ls2)
|
||||
// eqDone = true
|
||||
// }
|
||||
// } else if dict1, ok := input.wantResult.(*kern.DictType); ok {
|
||||
// if dict2, ok := gotResult.(*kern.DictType); ok {
|
||||
// eq = dict1.Equals(*dict2)
|
||||
// eqDone = true
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
if !eqDone {
|
||||
eq = reflect.DeepEqual(gotResult, input.wantResult)
|
||||
}
|
||||
// if !eqDone {
|
||||
// eq = reflect.DeepEqual(gotResult, input.wantResult)
|
||||
// }
|
||||
|
||||
if !eq /*gotResult != input.wantResult*/ {
|
||||
t.Errorf(">>>%s/%d: `%s` -> result = %v [%s], want = %v [%s]", section, count, input.source, gotResult, kern.TypeName(gotResult), input.wantResult, kern.TypeName(input.wantResult))
|
||||
|
||||
@ -38,8 +38,9 @@ func TestFractionsParser(t *testing.T) {
|
||||
/* 23 */ {`1+1:2+0.5`, float64(2), nil},
|
||||
/* 24 */ {`1:(2-2)`, nil, `[1:3] division by zero`},
|
||||
/* 25 */ {`[0,1][1-1]:1`, kern.NewFraction(0, 1), nil},
|
||||
/* 26 */ {`1:2 == 0.5`, true, nil},
|
||||
}
|
||||
// runTestSuiteSpec(t, section, inputs, 25)
|
||||
// runTestSuiteSpec(t, section, inputs, 26)
|
||||
runTestSuite(t, section, inputs)
|
||||
}
|
||||
|
||||
|
||||
@ -35,16 +35,84 @@ func TestOperator(t *testing.T) {
|
||||
/* 20 */ {`a=1; a^=2`, int64(3), nil},
|
||||
/* 21 */ {`a=1; ++a`, int64(2), nil},
|
||||
/* 22 */ {`a=1; --a`, int64(0), nil},
|
||||
/* 23 */ {`[1,2,3] map var("_")`, kern.NewList([]any{int64(1), int64(2), int64(3)}), nil},
|
||||
/* 24 */ {`[1,2,3] map $_`, kern.NewList([]any{int64(1), int64(2), int64(3)}), nil},
|
||||
/* 25 */ {`[1,2,3,4] filter ($_ % 2 == 0)`, kern.NewList([]any{int64(2), int64(4)}), nil},
|
||||
/* 26 */ {`max=0; [2,3,1] digest max=(($_ > max) ? {$_} :: {max})`, int64(3), nil},
|
||||
/* 27 */ {`["a","b"] join ["x"]`, kern.NewList([]any{"a", "b", "x"}), nil},
|
||||
/* 28 */ {`["a","b"] join ["x"-true]`, nil, `[1:21] left operand 'x' [string] and right operand 'true' [bool] are not compatible with operator "-"`},
|
||||
}
|
||||
|
||||
// t.Setenv("EXPR_PATH", ".")
|
||||
|
||||
// runTestSuiteSpec(t, section, inputs, 28)
|
||||
// runTestSuiteSpec(t, section, inputs, 22)
|
||||
runTestSuite(t, section, inputs)
|
||||
}
|
||||
|
||||
func TestOperatorMap(t *testing.T) {
|
||||
section := "Operator-Map"
|
||||
inputs := []inputType{
|
||||
/* 1 */ {`a=1; --a`, int64(0), nil},
|
||||
/* 2 */ {`[1,2,3] map var("_")`, kern.NewList([]any{int64(1), int64(2), int64(3)}), nil},
|
||||
/* 3 */ {`[1,2,3] map $_`, kern.NewList([]any{int64(1), int64(2), int64(3)}), nil},
|
||||
}
|
||||
|
||||
// runTestSuiteSpec(t, section, inputs, 3)
|
||||
runTestSuite(t, section, inputs)
|
||||
}
|
||||
|
||||
func TestOperatorFilter(t *testing.T) {
|
||||
section := "Operator-Filter"
|
||||
inputs := []inputType{
|
||||
/* 1 */ {`[1,2,3,4] filter ($_ % 2 == 0)`, kern.NewList([]any{int64(2), int64(4)}), nil},
|
||||
}
|
||||
|
||||
// runTestSuiteSpec(t, section, inputs, 1)
|
||||
runTestSuite(t, section, inputs)
|
||||
}
|
||||
|
||||
func TestOperatorDigest(t *testing.T) {
|
||||
section := "Operator-Digest"
|
||||
inputs := []inputType{
|
||||
/* 1 */ {`max=0; [2,3,1] digest max=(($_ > max) ? {$_} :: {max})`, int64(3), nil},
|
||||
}
|
||||
|
||||
// runTestSuiteSpec(t, section, inputs, 29)
|
||||
runTestSuite(t, section, inputs)
|
||||
}
|
||||
|
||||
func TestOperatorJoin(t *testing.T) {
|
||||
section := "Operator-Join"
|
||||
inputs := []inputType{
|
||||
/* 1 */ {`["a","b"] join ["x"]`, kern.NewList([]any{"a", "b", "x"}), nil},
|
||||
/* 2 */ {`["a","b"] join ["x"-true]`, nil, `[1:21] left operand 'x' [string] and right operand 'true' [bool] are not compatible with operator "-"`},
|
||||
}
|
||||
|
||||
// runTestSuiteSpec(t, section, inputs, 2)
|
||||
runTestSuite(t, section, inputs)
|
||||
}
|
||||
|
||||
func TestOperatorGroupBy(t *testing.T) {
|
||||
section := "Operator-GroupBy"
|
||||
inputs := []inputType{
|
||||
/* 1 */ {`L=[{"num": 1, "alpha": "one"}, {"num": 2, "alpha": "two"}, {"num": 3, "alpha": "three"}]; L groupby "num"`,
|
||||
kern.NewDict(map[any]any{
|
||||
"1": kern.NewListA(kern.NewDict(map[any]any{"num": int64(1), "alpha": "one"})),
|
||||
"2": kern.NewListA(kern.NewDict(map[any]any{"num": int64(2), "alpha": "two"})),
|
||||
"3": kern.NewListA(kern.NewDict(map[any]any{"num": int64(3), "alpha": "three"})),
|
||||
}),
|
||||
nil},
|
||||
/* 2 */ {`cars = [{"model": "compas", "vendor": "jeep"}, {"model": "limited", "vendor": "jeep"}, {"model": "600", "vendor":"fiat"}]; cars groupby "vendor"`,
|
||||
kern.NewDict(map[any]any{
|
||||
"jeep": kern.NewListA(
|
||||
kern.NewDict(map[any]any{"model": "compas", "vendor": "jeep"}),
|
||||
kern.NewDict(map[any]any{"model": "limited", "vendor": "jeep"})),
|
||||
"fiat": kern.NewListA(kern.NewDict(map[any]any{"model": "600", "vendor": "fiat"})),
|
||||
}),
|
||||
nil},
|
||||
/* 3 */ {`[3,4,5] groupby $__`,
|
||||
kern.NewDict(map[any]any{
|
||||
"0": kern.NewListA(int64(3)),
|
||||
"1": kern.NewListA(int64(4)),
|
||||
"2": kern.NewListA(int64(5)),
|
||||
}),
|
||||
nil},
|
||||
}
|
||||
|
||||
runTestSuiteSpec(t, section, inputs, 3)
|
||||
// runTestSuite(t, section, inputs)
|
||||
}
|
||||
|
||||
4
term.go
4
term.go
@ -235,6 +235,10 @@ func (t *term) errDivisionByZero() error {
|
||||
return t.tk.Errorf("division by zero")
|
||||
}
|
||||
|
||||
func (t *term) errKeyNotFound(key any) error {
|
||||
return t.tk.Errorf("key '%v' not found", key)
|
||||
}
|
||||
|
||||
func (t *term) Errorf(template string, args ...any) (err error) {
|
||||
err = t.tk.Errorf(template, args...)
|
||||
return
|
||||
|
||||
Loading…
Reference in New Issue
Block a user