From 3b2ef7927b2fb72990810a748286f36d4b909716 Mon Sep 17 00:00:00 2001 From: Celestino Amoroso Date: Sat, 2 May 2026 14:46:28 +0200 Subject: [PATCH] new operator 'groupby' --- kern/dict-type.go | 25 ++++++++-- kern/fraction-type.go | 25 +++++----- kern/list-type.go | 41 +++++++++-------- operator-assign.go | 2 +- operator-dot.go | 9 +++- operator-groupby.go | 105 ++++++++++++++++++++++++++++++++++++++++++ symbol-map.go | 1 + symbol.go | 2 + t_common_test.go | 30 ++++++------ t_fractions_test.go | 3 +- t_operator_test.go | 82 ++++++++++++++++++++++++++++++--- term.go | 4 ++ 12 files changed, 271 insertions(+), 58 deletions(-) create mode 100644 operator-groupby.go diff --git a/kern/dict-type.go b/kern/dict-type.go index 6950fb5..3f16aab 100644 --- a/kern/dict-type.go +++ b/kern/dict-type.go @@ -143,13 +143,12 @@ func (dict *DictType) HasKey(target any) (ok bool) { return } -func (dict *DictType) SetItem(key any, value any) (err error) { +func (dict *DictType) SetItem(key any, value any) { (*dict)[key] = value - return } -func (dict *DictType) GetItem(key any) (value any, err error) { - value = (*dict)[key] +func (dict *DictType) GetItem(key any) (value any, exists bool) { + value, exists = (*dict)[key] return } @@ -169,6 +168,24 @@ func (dict *DictType) Merge(second *DictType) { } } +func (dict *DictType) Equals(dict2 DictType) (answer bool) { + if dict2 != nil && len(*dict) == len(dict2) { + answer = true + for key, value1 := range *dict { + if value2, exists := dict2.GetItem(key); exists { + if !Equal(value1, value2) { + answer = false + break + } + } else { + answer = false + break + } + } + } + return +} + //////////////// type DictFormat interface { diff --git a/kern/fraction-type.go b/kern/fraction-type.go index 63ec354..0d9ad7f 100644 --- a/kern/fraction-type.go +++ b/kern/fraction-type.go @@ -220,11 +220,12 @@ func anyToFract(v any) (f *FractionType, err error) { if f, ok = v.(*FractionType); !ok { if n, ok := v.(int64); ok { f = intToFraction(n) + } else if dec, ok := v.(float64); ok { + f, err = Float64ToFraction(dec) + } else { + err = ErrExpectedGot("fract", TypeFraction, v) } } - if f == nil { - err = ErrExpectedGot("fract", TypeFraction, v) - } return } @@ -273,14 +274,16 @@ func CmpAnyFract(af1, af2 any) (result int, err error) { // =0 if af1 == af2 // >0 if af1 > af2 func cmpFract(f1, f2 *FractionType) (result int) { - f2.num = -f2.num - f := SumFract(f1, f2) - if f.num < 0 { - result = -1 - } else if f.num > 0 { - result = 1 - } else { - result = 0 + if f1 != nil && f2 != nil { + f2.num = -f2.num + f := SumFract(f1, f2) + if f.num < 0 { + result = -1 + } else if f.num > 0 { + result = 1 + } else { + result = 0 + } } return } diff --git a/kern/list-type.go b/kern/list-type.go index bf23fd3..e81bbea 100644 --- a/kern/list-type.go +++ b/kern/list-type.go @@ -50,13 +50,13 @@ func ListFromStrings(stringList []string) (list *ListType) { return } -func (ls *ListType) ToString(opt FmtOpt) (s string) { +func (dict *ListType) ToString(opt FmtOpt) (s string) { indent := GetFormatIndent(opt) flags := GetFormatFlags(opt) var sb strings.Builder sb.WriteByte('[') - if len(*ls) > 0 { + if len(*dict) > 0 { innerOpt := MakeFormatOptions(flags, indent+1) nest := strings.Repeat(" ", indent+1) @@ -64,7 +64,7 @@ func (ls *ListType) ToString(opt FmtOpt) (s string) { sb.WriteByte('\n') sb.WriteString(nest) } - for i, item := range []any(*ls) { + for i, item := range []any(*dict) { if i > 0 { if flags&MultiLine != 0 { sb.WriteString(",\n") @@ -96,19 +96,19 @@ func (ls *ListType) ToString(opt FmtOpt) (s string) { return } -func (ls *ListType) String() string { - return ls.ToString(0) +func (dict *ListType) String() string { + return dict.ToString(0) } -func (ls *ListType) TypeName() string { +func (dict *ListType) TypeName() string { return "list" } -func (ls *ListType) Contains(t *ListType) (answer bool) { - if len(*ls) >= len(*t) { +func (dict *ListType) Contains(t *ListType) (answer bool) { + if len(*dict) >= len(*t) { answer = true for _, item := range *t { - if answer = ls.IndexDeepSameCmp(item) >= 0; !answer { + if answer = dict.IndexDeepSameCmp(item) >= 0; !answer { break } } @@ -120,8 +120,11 @@ func (ls1 *ListType) Equals(ls2 ListType) (answer bool) { if ls2 != nil && len(*ls1) == len(ls2) { answer = true for index, i1 := range *ls1 { - // if i1 != (ls2)[index] { - if !reflect.DeepEqual(i1, ls2[index]) { + // if !reflect.DeepEqual(i1, ls2[index]) { + // answer = false + // break + // } + if !Equal(i1, ls2[index]) { answer = false break } @@ -130,11 +133,11 @@ func (ls1 *ListType) Equals(ls2 ListType) (answer bool) { return } -func (list *ListType) IndexDeepSameCmp(target any) (index int) { +func (dict *ListType) IndexDeepSameCmp(target any) (index int) { var eq bool var err error index = -1 - for i, item := range *list { + for i, item := range *dict { if eq, err = deepSame(item, target, SameContent); err != nil { break } else if eq { @@ -185,15 +188,15 @@ func deepSame(a, b any, deepCmp DeepFuncTemplate) (eq bool, err error) { return } -func (list *ListType) SetItem(index int64, value any) (err error) { - if index >= 0 && index < int64(len(*list)) { - (*list)[index] = value +func (dict *ListType) SetItem(index int64, value any) (err error) { + if index >= 0 && index < int64(len(*dict)) { + (*dict)[index] = value } else { - err = fmt.Errorf("index %d out of bounds (0, %d)", index, len(*list)-1) + err = fmt.Errorf("index %d out of bounds (0, %d)", index, len(*dict)-1) } return } -func (list *ListType) AppendItem(value any) { - *list = append(*list, value) +func (dict *ListType) AppendItem(value any) { + *dict = append(*dict, value) } diff --git a/operator-assign.go b/operator-assign.go index 492cbb7..445eb02 100644 --- a/operator-assign.go +++ b/operator-assign.go @@ -49,7 +49,7 @@ func assignCollectionItem(ctx kern.ExprContext, collectionTerm, keyListTerm *ter err = keyListTerm.Errorf("integer expected, got %v [%s]", keyValue, kern.TypeName(keyValue)) } case *kern.DictType: - err = collection.SetItem(keyValue, value) + collection.SetItem(keyValue, value) default: err = collectionTerm.Errorf("collection expected") } diff --git a/operator-dot.go b/operator-dot.go index bcbf1ac..b87d6ad 100644 --- a/operator-dot.go +++ b/operator-dot.go @@ -45,15 +45,20 @@ func evalDot(ctx kern.ExprContext, opTerm *term) (v any, err error) { err = indexTerm.tk.ErrorExpectedGot("identifier") } case *kern.DictType: + var ok bool s := opTerm.children[1].symbol() if s == SymVariable || s == SymString { src := opTerm.children[1].Source() if len(src) > 1 && src[0] == '"' && src[len(src)-1] == '"' { src = src[1 : len(src)-1] } - v, err = unboxedValue.GetItem(src) + if v, ok = unboxedValue.GetItem(src); !ok { + err = opTerm.errKeyNotFound(src) + } } else if rightValue, err = opTerm.children[1].Compute(ctx); err == nil { - v, err = unboxedValue.GetItem(rightValue) + if v, ok = unboxedValue.GetItem(rightValue); !ok { + err = opTerm.errKeyNotFound(rightValue) + } } default: if rightValue, err = opTerm.children[1].Compute(ctx); err == nil { diff --git a/operator-groupby.go b/operator-groupby.go new file mode 100644 index 0000000..3537b05 --- /dev/null +++ b/operator-groupby.go @@ -0,0 +1,105 @@ +// Copyright (c) 2024-2026 Celestino Amoroso (celestino.amoroso@gmail.com). +// All rights reserved. + +// operator-groupby.go +package expr + +import ( + "fmt" + "io" + "strconv" + + "git.portale-stac.it/go-pkg/expr/kern" +) + +//-------- group by term + +func newGroupByTerm(tk *Token) (inst *term) { + return &term{ + tk: *tk, + children: make([]*term, 0, 2), + position: posInfix, + priority: priIterOp, + evalFunc: evalGroupBy, + } +} + +func evalGroupBy(ctx kern.ExprContext, opTerm *term) (v any, err error) { + var leftValue, rightValue any + var it kern.Iterator + var item any + var sKey string + var keyByIndex bool + + if err = opTerm.checkOperands(); err != nil { + return + } + + if leftValue, err = opTerm.children[0].Compute(ctx); err != nil { + return + } + + if it, err = NewIterator(leftValue); err != nil { + return nil, fmt.Errorf("left operand of MAP must be an iterable data-source; got %s", kern.TypeName(leftValue)) + } + + rightTk := opTerm.children[1].tk + if rightTk.IsSymbol(SymVariable) && rightTk.source == "__" { + keyByIndex = true + } else if rightValue, err = opTerm.children[1].Compute(ctx); err != nil { + return + } else if kern.IsString(rightValue) { + sKey = rightValue.(string) + } else { + return nil, fmt.Errorf("right operand of GROUPBY must be a string or identifier '__'; got %s", kern.TypeName(rightValue)) + } + + values := kern.MakeDict() + for item, err = it.Next(); err == nil; item, err = it.Next() { + ctx.SetVar("_", item) + ctx.SetVar("__", it.Index()) + ctx.SetVar("_#", it.Count()) + + var sItemKey string + + if d, ok := item.(*kern.DictType); ok { + if keyByIndex || len(sKey) == 0 { + sItemKey = strconv.Itoa(int(it.Index())) + } else if d.HasKey(sKey) { + if keyValue, exists := d.GetItem(sKey); exists { + sItemKey = fmt.Sprintf("%v", keyValue) + } else { + sItemKey = "_" + } + } else { + sItemKey = "_" + } + } else { + sItemKey = strconv.Itoa(int(it.Index())) + } + + var ls *kern.ListType + if lsAny, exists := values.GetItem(sItemKey); exists && lsAny != nil { + ls = lsAny.(*kern.ListType) + } + if ls == nil { + ls = kern.NewListA() + } + ls.AppendItem(item) + values.SetItem(sItemKey, ls) + + ctx.DeleteVar("_#") + ctx.DeleteVar("__") + ctx.DeleteVar("_") + } + if err == io.EOF { + err = nil + } + v = values + return +} + +// init +func init() { + registerTermConstructor(SymKwGroupBy, newGroupByTerm) +} diff --git a/symbol-map.go b/symbol-map.go index 848c112..bb95e67 100644 --- a/symbol-map.go +++ b/symbol-map.go @@ -139,6 +139,7 @@ func init() { SymKwFilter: {"filter", symClassOperator, posInfix}, SymKwDigest: {"digest", symClassOperator, posInfix}, SymKwJoin: {"join", symClassOperator, posInfix}, + SymKwGroupBy: {"groupby", symClassOperator, posInfix}, SymKwFunc: {"func(", symClassDeclaration, posPrefix}, SymKwBuiltin: {"builtin", symClassOperator, posPrefix}, SymKwPlugin: {"plugin", symClassOperator, posPrefix}, diff --git a/symbol.go b/symbol.go index d1bf178..5f5ab99 100644 --- a/symbol.go +++ b/symbol.go @@ -122,6 +122,7 @@ const ( SymKwMap SymKwFilter SymKwDigest + SymKwGroupBy SymKwJoin SymKwNil SymKwUnset @@ -147,5 +148,6 @@ func init() { "UNSET": SymKwUnset, "DIGEST": SymKwDigest, "JOIN": SymKwJoin, + "GROUPBY": SymKwGroupBy, } } diff --git a/t_common_test.go b/t_common_test.go index 3cf9a42..4d74b6f 100644 --- a/t_common_test.go +++ b/t_common_test.go @@ -6,7 +6,6 @@ package expr import ( "errors" - "reflect" "strings" "testing" @@ -74,7 +73,6 @@ func doTest(t *testing.T, ctx kern.ExprContext, section string, input *inputType var ast Expr var gotResult any var gotErr error - var eq, eqDone bool wantErr := getWantedError(input) @@ -93,18 +91,24 @@ func doTest(t *testing.T, ctx kern.ExprContext, section string, input *inputType gotResult, gotErr = ast.Eval(ctx) } - if input.wantResult != nil && gotResult != nil { - if ls1, ok := input.wantResult.(*kern.ListType); ok { - if ls2, ok := gotResult.(*kern.ListType); ok { - eq = ls1.Equals(*ls2) - eqDone = true - } - } - } + eq := kern.Equal(gotResult, input.wantResult) + // if input.wantResult != nil && gotResult != nil { + // if ls1, ok := input.wantResult.(*kern.ListType); ok { + // if ls2, ok := gotResult.(*kern.ListType); ok { + // eq = ls1.Equals(*ls2) + // eqDone = true + // } + // } else if dict1, ok := input.wantResult.(*kern.DictType); ok { + // if dict2, ok := gotResult.(*kern.DictType); ok { + // eq = dict1.Equals(*dict2) + // eqDone = true + // } + // } + // } - if !eqDone { - eq = reflect.DeepEqual(gotResult, input.wantResult) - } + // if !eqDone { + // eq = reflect.DeepEqual(gotResult, input.wantResult) + // } if !eq /*gotResult != input.wantResult*/ { t.Errorf(">>>%s/%d: `%s` -> result = %v [%s], want = %v [%s]", section, count, input.source, gotResult, kern.TypeName(gotResult), input.wantResult, kern.TypeName(input.wantResult)) diff --git a/t_fractions_test.go b/t_fractions_test.go index 7a79369..fe6ba81 100644 --- a/t_fractions_test.go +++ b/t_fractions_test.go @@ -38,8 +38,9 @@ func TestFractionsParser(t *testing.T) { /* 23 */ {`1+1:2+0.5`, float64(2), nil}, /* 24 */ {`1:(2-2)`, nil, `[1:3] division by zero`}, /* 25 */ {`[0,1][1-1]:1`, kern.NewFraction(0, 1), nil}, + /* 26 */ {`1:2 == 0.5`, true, nil}, } - // runTestSuiteSpec(t, section, inputs, 25) + // runTestSuiteSpec(t, section, inputs, 26) runTestSuite(t, section, inputs) } diff --git a/t_operator_test.go b/t_operator_test.go index d1ed6ea..577e4d6 100644 --- a/t_operator_test.go +++ b/t_operator_test.go @@ -35,16 +35,84 @@ func TestOperator(t *testing.T) { /* 20 */ {`a=1; a^=2`, int64(3), nil}, /* 21 */ {`a=1; ++a`, int64(2), nil}, /* 22 */ {`a=1; --a`, int64(0), nil}, - /* 23 */ {`[1,2,3] map var("_")`, kern.NewList([]any{int64(1), int64(2), int64(3)}), nil}, - /* 24 */ {`[1,2,3] map $_`, kern.NewList([]any{int64(1), int64(2), int64(3)}), nil}, - /* 25 */ {`[1,2,3,4] filter ($_ % 2 == 0)`, kern.NewList([]any{int64(2), int64(4)}), nil}, - /* 26 */ {`max=0; [2,3,1] digest max=(($_ > max) ? {$_} :: {max})`, int64(3), nil}, - /* 27 */ {`["a","b"] join ["x"]`, kern.NewList([]any{"a", "b", "x"}), nil}, - /* 28 */ {`["a","b"] join ["x"-true]`, nil, `[1:21] left operand 'x' [string] and right operand 'true' [bool] are not compatible with operator "-"`}, } // t.Setenv("EXPR_PATH", ".") - // runTestSuiteSpec(t, section, inputs, 28) + // runTestSuiteSpec(t, section, inputs, 22) runTestSuite(t, section, inputs) } + +func TestOperatorMap(t *testing.T) { + section := "Operator-Map" + inputs := []inputType{ + /* 1 */ {`a=1; --a`, int64(0), nil}, + /* 2 */ {`[1,2,3] map var("_")`, kern.NewList([]any{int64(1), int64(2), int64(3)}), nil}, + /* 3 */ {`[1,2,3] map $_`, kern.NewList([]any{int64(1), int64(2), int64(3)}), nil}, + } + + // runTestSuiteSpec(t, section, inputs, 3) + runTestSuite(t, section, inputs) +} + +func TestOperatorFilter(t *testing.T) { + section := "Operator-Filter" + inputs := []inputType{ + /* 1 */ {`[1,2,3,4] filter ($_ % 2 == 0)`, kern.NewList([]any{int64(2), int64(4)}), nil}, + } + + // runTestSuiteSpec(t, section, inputs, 1) + runTestSuite(t, section, inputs) +} + +func TestOperatorDigest(t *testing.T) { + section := "Operator-Digest" + inputs := []inputType{ + /* 1 */ {`max=0; [2,3,1] digest max=(($_ > max) ? {$_} :: {max})`, int64(3), nil}, + } + + // runTestSuiteSpec(t, section, inputs, 29) + runTestSuite(t, section, inputs) +} + +func TestOperatorJoin(t *testing.T) { + section := "Operator-Join" + inputs := []inputType{ + /* 1 */ {`["a","b"] join ["x"]`, kern.NewList([]any{"a", "b", "x"}), nil}, + /* 2 */ {`["a","b"] join ["x"-true]`, nil, `[1:21] left operand 'x' [string] and right operand 'true' [bool] are not compatible with operator "-"`}, + } + + // runTestSuiteSpec(t, section, inputs, 2) + runTestSuite(t, section, inputs) +} + +func TestOperatorGroupBy(t *testing.T) { + section := "Operator-GroupBy" + inputs := []inputType{ + /* 1 */ {`L=[{"num": 1, "alpha": "one"}, {"num": 2, "alpha": "two"}, {"num": 3, "alpha": "three"}]; L groupby "num"`, + kern.NewDict(map[any]any{ + "1": kern.NewListA(kern.NewDict(map[any]any{"num": int64(1), "alpha": "one"})), + "2": kern.NewListA(kern.NewDict(map[any]any{"num": int64(2), "alpha": "two"})), + "3": kern.NewListA(kern.NewDict(map[any]any{"num": int64(3), "alpha": "three"})), + }), + nil}, + /* 2 */ {`cars = [{"model": "compas", "vendor": "jeep"}, {"model": "limited", "vendor": "jeep"}, {"model": "600", "vendor":"fiat"}]; cars groupby "vendor"`, + kern.NewDict(map[any]any{ + "jeep": kern.NewListA( + kern.NewDict(map[any]any{"model": "compas", "vendor": "jeep"}), + kern.NewDict(map[any]any{"model": "limited", "vendor": "jeep"})), + "fiat": kern.NewListA(kern.NewDict(map[any]any{"model": "600", "vendor": "fiat"})), + }), + nil}, + /* 3 */ {`[3,4,5] groupby $__`, + kern.NewDict(map[any]any{ + "0": kern.NewListA(int64(3)), + "1": kern.NewListA(int64(4)), + "2": kern.NewListA(int64(5)), + }), + nil}, + } + + runTestSuiteSpec(t, section, inputs, 3) + // runTestSuite(t, section, inputs) +} diff --git a/term.go b/term.go index e345bfc..94860d5 100644 --- a/term.go +++ b/term.go @@ -235,6 +235,10 @@ func (t *term) errDivisionByZero() error { return t.tk.Errorf("division by zero") } +func (t *term) errKeyNotFound(key any) error { + return t.tk.Errorf("key '%v' not found", key) +} + func (t *term) Errorf(template string, args ...any) (err error) { err = t.tk.Errorf(template, args...) return