From 45db94c4733f58c9e018c4e6ee9e055a30e1628c Mon Sep 17 00:00:00 2001 From: tanyanliang110 <3190101362@zju.edu.cn> Date: Thu, 5 Dec 2024 19:45:56 +0800 Subject: [PATCH 1/6] Optimize the slice_string function --- pipeline/ptinput/funcs/fn_slice_string.go | 62 +++++++++++++++---- .../ptinput/funcs/fn_slice_string_test.go | 38 ++++++------ pipeline/ptinput/funcs/md/slice_string.en.md | 21 +++++-- pipeline/ptinput/funcs/md/slice_string.md | 21 +++++-- 4 files changed, 103 insertions(+), 39 deletions(-) diff --git a/pipeline/ptinput/funcs/fn_slice_string.go b/pipeline/ptinput/funcs/fn_slice_string.go index 844caf87..683fdaee 100644 --- a/pipeline/ptinput/funcs/fn_slice_string.go +++ b/pipeline/ptinput/funcs/fn_slice_string.go @@ -3,6 +3,8 @@ package funcs import ( _ "embed" + "math" + "github.com/GuanceCloud/platypus/pkg/ast" "github.com/GuanceCloud/platypus/pkg/engine/runtime" "github.com/GuanceCloud/platypus/pkg/errchain" @@ -31,8 +33,20 @@ var ( Type: []ast.DType{ast.Int}, }, { - Name: "end", - Type: []ast.DType{ast.Int}, + Name: "end", + Type: []ast.DType{ast.Int}, + Optional: true, + DefaultVal: func() (any, ast.DType) { + return int64(math.MaxInt64), ast.Int + }, + }, + { + Name: "step", + Type: []ast.DType{ast.Int}, + Optional: true, + DefaultVal: func() (any, ast.DType) { + return int64(1), ast.Int + }, }, }, []ast.DType{ast.String}, @@ -53,29 +67,53 @@ var ( ) func sliceString(ctx *runtime.Task, funcExpr *ast.CallExpr, vals ...any) *errchain.PlError { - errstring := "" - if len(vals) != 3 { - ctx.Regs.ReturnAppend(errstring, ast.String) + if len(vals) < 2 || len(vals) > 4 { + ctx.Regs.ReturnAppend("", ast.String) return nil } name := vals[0].(string) + length := int64(len(name)) start, ok := vals[1].(int64) if !ok { - ctx.Regs.ReturnAppend(errstring, ast.String) + ctx.Regs.ReturnAppend("", ast.String) return nil } end, ok := vals[2].(int64) if !ok { - ctx.Regs.ReturnAppend(errstring, ast.String) + ctx.Regs.ReturnAppend("", ast.String) return nil } - if start < 0 || end > int64(len(name)) || start > end { - ctx.Regs.ReturnAppend(errstring, ast.String) + step, ok := vals[3].(int64) + if !ok || step == 0 { + ctx.Regs.ReturnAppend("", ast.String) return nil } - substring := name[start:end] + if start < 0 { + start = int64(len(name)) + start + } + if end < 0 { + end = int64(len(name)) + end + } - ctx.Regs.ReturnAppend(substring, ast.String) - return nil + substring := "" + if step > 0 { + if start < 0 { + start = 0 + } + for i := start; i < length && i < end; i += step { + substring += string(name[i]) + } + ctx.Regs.ReturnAppend(substring, ast.String) + return nil + } else { + if start > length-1 { + start = length - 1 + } + for i := start; i > end && i >= 0; i += step { + substring += string(name[i]) + } + ctx.Regs.ReturnAppend(substring, ast.String) + return nil + } } diff --git a/pipeline/ptinput/funcs/fn_slice_string_test.go b/pipeline/ptinput/funcs/fn_slice_string_test.go index 32969a4f..2775f5b2 100644 --- a/pipeline/ptinput/funcs/fn_slice_string_test.go +++ b/pipeline/ptinput/funcs/fn_slice_string_test.go @@ -60,7 +60,7 @@ func TestSliceString(t *testing.T) { fail: false, }, { - name: "out of range1", + name: "normal4", pl: ` substring = slice_string("abcdefghijklmnop",-1,10) pt_kvs_set("result", substring) @@ -70,49 +70,49 @@ func TestSliceString(t *testing.T) { fail: false, }, { - name: "out of range2", + name: "normal5", pl: ` substring = slice_string("abcdefghijklmnop",0,100) pt_kvs_set("result", substring) `, keyName: "result", - expect: "", + expect: "abcdefghijklmnop", fail: false, }, { - name: "not integer1", + name: "normal6", pl: ` - substring = slice_string("abcdefghijklmnop","a","b") + substring = slice_string("abcdefghijklmnop",0,1,2) pt_kvs_set("result", substring) `, keyName: "result", - expect: "", - fail: true, + expect: "a", + fail: false, }, { - name: "not integer2", + name: "normal7", pl: ` - substring = slice_string("abcdefghijklmnop","abc","def") + substring = slice_string("abcdefghijklmnop",0,-1,2) pt_kvs_set("result", substring) `, keyName: "result", - expect: "", - fail: true, + expect: "acegikmo", + fail: false, }, { - name: "not string", + name: "normal8", pl: ` - substring = slice_string(12345,0,3) + substring = slice_string("15384073392", 9, 0, -2) pt_kvs_set("result", substring) `, keyName: "result", - expect: "", - fail: true, + expect: "93085", + fail: false, }, { - name: "not correct args", + name: "not integer", pl: ` - substring = slice_string("abcdefghijklmnop",0) + substring = slice_string("abcdefghijklmnop","a","b") pt_kvs_set("result", substring) `, keyName: "result", @@ -120,9 +120,9 @@ func TestSliceString(t *testing.T) { fail: true, }, { - name: "not correct args", + name: "not string", pl: ` - substring = slice_string("abcdefghijklmnop",0,1,2) + substring = slice_string(12345,0,3) pt_kvs_set("result", substring) `, keyName: "result", diff --git a/pipeline/ptinput/funcs/md/slice_string.en.md b/pipeline/ptinput/funcs/md/slice_string.en.md index fa2e87d6..929e91e5 100644 --- a/pipeline/ptinput/funcs/md/slice_string.en.md +++ b/pipeline/ptinput/funcs/md/slice_string.en.md @@ -1,18 +1,31 @@ ### `slice_string()` {#fn_slice_string} -Function prototype: `fn slice_string(name: str, start: int, end: int) -> str` +Function prototype: `fn slice_string(name: str, start: int, end: int, step: int = 1) -> str` -Function description: Returns the substring of the string from index start to end. +Function description: Returns the substring of the string from the index start to end, supporting negative indices and automatic range adjustment, and allowing the specification of a step. Function Parameters: - `name`: The string to be sliced - `start`: The starting index of the substring (inclusive) - `end`: The ending index of the substring (exclusive) +- `step`: The step, optional parameter, default is 1, supports negative steps Example: ```python -substring = slice_string("15384073392", 0, 3) -# substring will be "153" +substring = slice_string("15384073392", 0, 3) +# substring is "153" +substring2 = slice_string("15384073392", 0, 100) +# substring2 is "15384073392" +# If `start` or `end` exceeds the range of the string, the function will automatically adjust to the boundaries of the string. +substring3 = slice_string("15384073392", -5, -1) +# substring3 is "7339" +# Negative indices indicate counting from the end of the string. +substring4 = slice_string("15384073392", 0, -1, 2) +# substring4 is "13473" +substring5 = slice_string("15384073392", 9, 0, -2) +# substring5 is "93085" +# If `step` is positive, it slices from `start` to `end` with the specified step. +# If `step` is negative, it slices from `start` to `end` in reverse order with the specified step. ``` \ No newline at end of file diff --git a/pipeline/ptinput/funcs/md/slice_string.md b/pipeline/ptinput/funcs/md/slice_string.md index 967fe174..267ddcca 100644 --- a/pipeline/ptinput/funcs/md/slice_string.md +++ b/pipeline/ptinput/funcs/md/slice_string.md @@ -1,18 +1,31 @@ ### `slice_string()` {#fn_slice_string} -函数原型:`fn slice_string(name: str, start: int, end: int) -> str` +函数原型:`fn slice_string(name: str, start: int, end: int, step: int = 1) -> str` -函数说明:返回字符串从索引 start 到 end 的子字符串。 +函数说明:返回字符串从索引 start 到 end 的子字符串,支持负数索引和自动调整范围,并且可以指定步长。 函数参数: - `name`: 要截取的字符串 - `start`: 子字符串的起始索引(包含) - `end`: 子字符串的结束索引(不包含) +- `step`: 步长,可选参数,默认为 1,支持负数步长 示例: ```python -substring = slice_string("15384073392", 0, 3) -# substring 的值为 "153" +substring = slice_string("15384073392", 0, 3) +# substring 的值为 "153" +substring2 = slice_string("15384073392", 0, 100) +# substring2 的值为 "15384073392" +# 如果 start 或 end 超出字符串的范围,函数会自动调整到字符串的边界。 +substring3 = slice_string("15384073392", -5, -1) +# substring3 的值为 "7339" +# 负数索引表示从字符串末尾开始计算。 +substring4 = slice_string("15384073392", 0, -1, 2) +# substring4 的值为 "13473" +substring5 = slice_string("15384073392", 9, 0, -2) +# substring5 的值为 "93085" +# 如果 step 为正数,则从 start 到 end 按步长截取。 +# 如果 step 为负数,则从 start 到 end 按步长反向截取。 ``` \ No newline at end of file From 1c96b6c1e06fbdab57a51677f42bb713f1a1c821 Mon Sep 17 00:00:00 2001 From: tanyanliang110 <3190101362@zju.edu.cn> Date: Thu, 5 Dec 2024 20:01:38 +0800 Subject: [PATCH 2/6] Optimize the slice_string function --- pipeline/ptinput/funcs/fn_slice_string.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pipeline/ptinput/funcs/fn_slice_string.go b/pipeline/ptinput/funcs/fn_slice_string.go index 683fdaee..a3928021 100644 --- a/pipeline/ptinput/funcs/fn_slice_string.go +++ b/pipeline/ptinput/funcs/fn_slice_string.go @@ -3,8 +3,6 @@ package funcs import ( _ "embed" - "math" - "github.com/GuanceCloud/platypus/pkg/ast" "github.com/GuanceCloud/platypus/pkg/engine/runtime" "github.com/GuanceCloud/platypus/pkg/errchain" @@ -33,12 +31,8 @@ var ( Type: []ast.DType{ast.Int}, }, { - Name: "end", - Type: []ast.DType{ast.Int}, - Optional: true, - DefaultVal: func() (any, ast.DType) { - return int64(math.MaxInt64), ast.Int - }, + Name: "end", + Type: []ast.DType{ast.Int}, }, { Name: "step", From 1749227d3c5760e4e841ee2dcc187ac14d896676 Mon Sep 17 00:00:00 2001 From: tanyanliang110 <3190101362@zju.edu.cn> Date: Tue, 7 Jan 2025 10:52:43 +0800 Subject: [PATCH 3/6] debug --- pipeline/ptinput/funcs/fn_pt_kvs_test.go | 9 +++++++ pipeline/ptinput/funcs/utils_fn.go | 34 ++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/pipeline/ptinput/funcs/fn_pt_kvs_test.go b/pipeline/ptinput/funcs/fn_pt_kvs_test.go index 0275e1c1..180c3604 100644 --- a/pipeline/ptinput/funcs/fn_pt_kvs_test.go +++ b/pipeline/ptinput/funcs/fn_pt_kvs_test.go @@ -165,6 +165,15 @@ func TestPtKvsSet(t *testing.T) { expect interface{} fail bool }{ + { + name: "key1", + pl: ` + if "real_call_time_float" in pt_kvs_keys(fields=false,tags=true) { + } + `, + keyName: "key2", + expect: nil, + }, { name: "set1", pl: ` diff --git a/pipeline/ptinput/funcs/utils_fn.go b/pipeline/ptinput/funcs/utils_fn.go index 06b00ef8..dfe99182 100644 --- a/pipeline/ptinput/funcs/utils_fn.go +++ b/pipeline/ptinput/funcs/utils_fn.go @@ -37,10 +37,20 @@ func WrapFnCall(fn FnCall, paramDesc []*Param) runtime.FuncCall { // Note that some functions do not take the value of the variable // corresponding to the parameter, but its name. - vals := make([]any, len(funcExpr.Param)) + var vals []any lenP := len(paramDesc) varP := false + lenF := len(funcExpr.Param) + paramMap := make(map[string]any, lenP) + + if lenF < lenP { + vals = make([]any, lenP) + + } else { + vals = make([]any, lenF) + } + if lenP > 0 { if paramDesc[lenP-1].VariableP { lenP -= 1 @@ -48,7 +58,27 @@ func WrapFnCall(fn FnCall, paramDesc []*Param) runtime.FuncCall { } for i := 0; i < lenP; i++ { - if val, err := getParam(ctx, paramDesc[i], funcExpr.Param[i]); err != nil { + if i < lenF { + input := funcExpr.Param[i] + if val, err := getParam(ctx, paramDesc[i], input); err != nil { + return err + } else if input != nil && input.NodeType == ast.TypeAssignmentExpr { + paramMap[input.AssignmentExpr().LHS.Identifier().String()] = val + } else { + paramMap[paramDesc[i].Name] = val + } + } else { + if val, err := getParam(ctx, paramDesc[i], nil); err != nil { + return err + } else if _, exist := paramMap[paramDesc[i].Name]; !exist { + paramMap[paramDesc[i].Name] = val + } + } + } + for i := 0; i < lenP; i++ { + if param, exist := paramMap[paramDesc[i].Name]; exist { + vals[i] = param + } else if val, err := getParam(ctx, paramDesc[i], nil); err != nil { return err } else { vals[i] = val From 60b667006f11f7cb5f888051583278bb0e4d6a5d Mon Sep 17 00:00:00 2001 From: tanyanliang110 <3190101362@zju.edu.cn> Date: Tue, 7 Jan 2025 11:04:39 +0800 Subject: [PATCH 4/6] debug --- pipeline/ptinput/funcs/fn_slice_string.go | 52 ++++--------------- .../ptinput/funcs/fn_slice_string_test.go | 38 +++++++------- pipeline/ptinput/funcs/md/slice_string.en.md | 31 ++++------- pipeline/ptinput/funcs/md/slice_string.md | 21 ++------ 4 files changed, 42 insertions(+), 100 deletions(-) diff --git a/pipeline/ptinput/funcs/fn_slice_string.go b/pipeline/ptinput/funcs/fn_slice_string.go index a3928021..844caf87 100644 --- a/pipeline/ptinput/funcs/fn_slice_string.go +++ b/pipeline/ptinput/funcs/fn_slice_string.go @@ -34,14 +34,6 @@ var ( Name: "end", Type: []ast.DType{ast.Int}, }, - { - Name: "step", - Type: []ast.DType{ast.Int}, - Optional: true, - DefaultVal: func() (any, ast.DType) { - return int64(1), ast.Int - }, - }, }, []ast.DType{ast.String}, [2]*PLDoc{ @@ -61,53 +53,29 @@ var ( ) func sliceString(ctx *runtime.Task, funcExpr *ast.CallExpr, vals ...any) *errchain.PlError { - if len(vals) < 2 || len(vals) > 4 { - ctx.Regs.ReturnAppend("", ast.String) + errstring := "" + if len(vals) != 3 { + ctx.Regs.ReturnAppend(errstring, ast.String) return nil } name := vals[0].(string) - length := int64(len(name)) start, ok := vals[1].(int64) if !ok { - ctx.Regs.ReturnAppend("", ast.String) + ctx.Regs.ReturnAppend(errstring, ast.String) return nil } end, ok := vals[2].(int64) if !ok { - ctx.Regs.ReturnAppend("", ast.String) + ctx.Regs.ReturnAppend(errstring, ast.String) return nil } - step, ok := vals[3].(int64) - if !ok || step == 0 { - ctx.Regs.ReturnAppend("", ast.String) + if start < 0 || end > int64(len(name)) || start > end { + ctx.Regs.ReturnAppend(errstring, ast.String) return nil } - if start < 0 { - start = int64(len(name)) + start - } - if end < 0 { - end = int64(len(name)) + end - } + substring := name[start:end] - substring := "" - if step > 0 { - if start < 0 { - start = 0 - } - for i := start; i < length && i < end; i += step { - substring += string(name[i]) - } - ctx.Regs.ReturnAppend(substring, ast.String) - return nil - } else { - if start > length-1 { - start = length - 1 - } - for i := start; i > end && i >= 0; i += step { - substring += string(name[i]) - } - ctx.Regs.ReturnAppend(substring, ast.String) - return nil - } + ctx.Regs.ReturnAppend(substring, ast.String) + return nil } diff --git a/pipeline/ptinput/funcs/fn_slice_string_test.go b/pipeline/ptinput/funcs/fn_slice_string_test.go index 2775f5b2..32969a4f 100644 --- a/pipeline/ptinput/funcs/fn_slice_string_test.go +++ b/pipeline/ptinput/funcs/fn_slice_string_test.go @@ -60,7 +60,7 @@ func TestSliceString(t *testing.T) { fail: false, }, { - name: "normal4", + name: "out of range1", pl: ` substring = slice_string("abcdefghijklmnop",-1,10) pt_kvs_set("result", substring) @@ -70,49 +70,49 @@ func TestSliceString(t *testing.T) { fail: false, }, { - name: "normal5", + name: "out of range2", pl: ` substring = slice_string("abcdefghijklmnop",0,100) pt_kvs_set("result", substring) `, keyName: "result", - expect: "abcdefghijklmnop", + expect: "", fail: false, }, { - name: "normal6", + name: "not integer1", pl: ` - substring = slice_string("abcdefghijklmnop",0,1,2) + substring = slice_string("abcdefghijklmnop","a","b") pt_kvs_set("result", substring) `, keyName: "result", - expect: "a", - fail: false, + expect: "", + fail: true, }, { - name: "normal7", + name: "not integer2", pl: ` - substring = slice_string("abcdefghijklmnop",0,-1,2) + substring = slice_string("abcdefghijklmnop","abc","def") pt_kvs_set("result", substring) `, keyName: "result", - expect: "acegikmo", - fail: false, + expect: "", + fail: true, }, { - name: "normal8", + name: "not string", pl: ` - substring = slice_string("15384073392", 9, 0, -2) + substring = slice_string(12345,0,3) pt_kvs_set("result", substring) `, keyName: "result", - expect: "93085", - fail: false, + expect: "", + fail: true, }, { - name: "not integer", + name: "not correct args", pl: ` - substring = slice_string("abcdefghijklmnop","a","b") + substring = slice_string("abcdefghijklmnop",0) pt_kvs_set("result", substring) `, keyName: "result", @@ -120,9 +120,9 @@ func TestSliceString(t *testing.T) { fail: true, }, { - name: "not string", + name: "not correct args", pl: ` - substring = slice_string(12345,0,3) + substring = slice_string("abcdefghijklmnop",0,1,2) pt_kvs_set("result", substring) `, keyName: "result", diff --git a/pipeline/ptinput/funcs/md/slice_string.en.md b/pipeline/ptinput/funcs/md/slice_string.en.md index 929e91e5..967fe174 100644 --- a/pipeline/ptinput/funcs/md/slice_string.en.md +++ b/pipeline/ptinput/funcs/md/slice_string.en.md @@ -1,31 +1,18 @@ ### `slice_string()` {#fn_slice_string} -Function prototype: `fn slice_string(name: str, start: int, end: int, step: int = 1) -> str` +函数原型:`fn slice_string(name: str, start: int, end: int) -> str` -Function description: Returns the substring of the string from the index start to end, supporting negative indices and automatic range adjustment, and allowing the specification of a step. +函数说明:返回字符串从索引 start 到 end 的子字符串。 -Function Parameters: +函数参数: -- `name`: The string to be sliced -- `start`: The starting index of the substring (inclusive) -- `end`: The ending index of the substring (exclusive) -- `step`: The step, optional parameter, default is 1, supports negative steps +- `name`: 要截取的字符串 +- `start`: 子字符串的起始索引(包含) +- `end`: 子字符串的结束索引(不包含) -Example: +示例: ```python -substring = slice_string("15384073392", 0, 3) -# substring is "153" -substring2 = slice_string("15384073392", 0, 100) -# substring2 is "15384073392" -# If `start` or `end` exceeds the range of the string, the function will automatically adjust to the boundaries of the string. -substring3 = slice_string("15384073392", -5, -1) -# substring3 is "7339" -# Negative indices indicate counting from the end of the string. -substring4 = slice_string("15384073392", 0, -1, 2) -# substring4 is "13473" -substring5 = slice_string("15384073392", 9, 0, -2) -# substring5 is "93085" -# If `step` is positive, it slices from `start` to `end` with the specified step. -# If `step` is negative, it slices from `start` to `end` in reverse order with the specified step. +substring = slice_string("15384073392", 0, 3) +# substring 的值为 "153" ``` \ No newline at end of file diff --git a/pipeline/ptinput/funcs/md/slice_string.md b/pipeline/ptinput/funcs/md/slice_string.md index 267ddcca..967fe174 100644 --- a/pipeline/ptinput/funcs/md/slice_string.md +++ b/pipeline/ptinput/funcs/md/slice_string.md @@ -1,31 +1,18 @@ ### `slice_string()` {#fn_slice_string} -函数原型:`fn slice_string(name: str, start: int, end: int, step: int = 1) -> str` +函数原型:`fn slice_string(name: str, start: int, end: int) -> str` -函数说明:返回字符串从索引 start 到 end 的子字符串,支持负数索引和自动调整范围,并且可以指定步长。 +函数说明:返回字符串从索引 start 到 end 的子字符串。 函数参数: - `name`: 要截取的字符串 - `start`: 子字符串的起始索引(包含) - `end`: 子字符串的结束索引(不包含) -- `step`: 步长,可选参数,默认为 1,支持负数步长 示例: ```python -substring = slice_string("15384073392", 0, 3) -# substring 的值为 "153" -substring2 = slice_string("15384073392", 0, 100) -# substring2 的值为 "15384073392" -# 如果 start 或 end 超出字符串的范围,函数会自动调整到字符串的边界。 -substring3 = slice_string("15384073392", -5, -1) -# substring3 的值为 "7339" -# 负数索引表示从字符串末尾开始计算。 -substring4 = slice_string("15384073392", 0, -1, 2) -# substring4 的值为 "13473" -substring5 = slice_string("15384073392", 9, 0, -2) -# substring5 的值为 "93085" -# 如果 step 为正数,则从 start 到 end 按步长截取。 -# 如果 step 为负数,则从 start 到 end 按步长反向截取。 +substring = slice_string("15384073392", 0, 3) +# substring 的值为 "153" ``` \ No newline at end of file From c6cdcf565afa22c617235a5a4f97f123e3a597df Mon Sep 17 00:00:00 2001 From: tanyanliang110 <3190101362@zju.edu.cn> Date: Tue, 7 Jan 2025 11:06:42 +0800 Subject: [PATCH 5/6] debug --- pipeline/ptinput/funcs/md/slice_string.en.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pipeline/ptinput/funcs/md/slice_string.en.md b/pipeline/ptinput/funcs/md/slice_string.en.md index 967fe174..fa2e87d6 100644 --- a/pipeline/ptinput/funcs/md/slice_string.en.md +++ b/pipeline/ptinput/funcs/md/slice_string.en.md @@ -1,18 +1,18 @@ ### `slice_string()` {#fn_slice_string} -函数原型:`fn slice_string(name: str, start: int, end: int) -> str` +Function prototype: `fn slice_string(name: str, start: int, end: int) -> str` -函数说明:返回字符串从索引 start 到 end 的子字符串。 +Function description: Returns the substring of the string from index start to end. -函数参数: +Function Parameters: -- `name`: 要截取的字符串 -- `start`: 子字符串的起始索引(包含) -- `end`: 子字符串的结束索引(不包含) +- `name`: The string to be sliced +- `start`: The starting index of the substring (inclusive) +- `end`: The ending index of the substring (exclusive) -示例: +Example: ```python substring = slice_string("15384073392", 0, 3) -# substring 的值为 "153" +# substring will be "153" ``` \ No newline at end of file From c9a326f09030f758f16275d69f7cffef50f04d24 Mon Sep 17 00:00:00 2001 From: tanyanliang110 <3190101362@zju.edu.cn> Date: Thu, 16 Jan 2025 12:01:32 +0800 Subject: [PATCH 6/6] func kv_split --- pipeline/ptinput/funcs/fn_kv.go | 90 +++++++++++++++++++++--- pipeline/ptinput/funcs/md/kv_split.en.md | 3 +- pipeline/ptinput/funcs/md/kv_split.md | 3 +- 3 files changed, 86 insertions(+), 10 deletions(-) diff --git a/pipeline/ptinput/funcs/fn_kv.go b/pipeline/ptinput/funcs/fn_kv.go index a5451693..74d0dff5 100644 --- a/pipeline/ptinput/funcs/fn_kv.go +++ b/pipeline/ptinput/funcs/fn_kv.go @@ -62,7 +62,7 @@ func (c *reCache) get(p string) (*regexp.Regexp, bool) { func KVSplitChecking(ctx *runtime.Task, funcExpr *ast.CallExpr) *errchain.PlError { if err := normalizeFuncArgsDeprecated(funcExpr, []string{ "key", "field_split_pattern", "value_split_pattern", - "trim_key", "trim_value", "include_keys", "prefix", + "trim_key", "trim_value", "include_keys", "prefix", "value_delimiters", }, 1); err != nil { return runtime.NewRunError(ctx, err.Error(), funcExpr.NamePos) } @@ -134,6 +134,15 @@ func KVSplitChecking(ctx *runtime.Task, funcExpr *ast.CallExpr) *errchain.PlErro funcExpr.Param[6].NodeType), funcExpr.NamePos) } } + // value_Delimiters + if funcExpr.Param[7] != nil { + switch funcExpr.Param[7].NodeType { //nolint:exhaustive + case ast.TypeListLiteral, ast.TypeIdentifier: + default: + return runtime.NewRunError(ctx, fmt.Sprintf("param value_delimiters expect ListInitExpr or Identifier, got %s", + funcExpr.Param[7].NodeType), funcExpr.NamePos) + } + } return nil } @@ -253,7 +262,38 @@ func KVSplit(ctx *runtime.Task, funcExpr *ast.CallExpr) *errchain.PlError { } } - result := kvSplit(val, includeKeys, fieldSplit, valueSplit, trimKey, trimValue, prefix) + var valueDelimiters []string + if funcExpr.Param[7] != nil { + switch funcExpr.Param[7].NodeType { //nolint:exhaustive + case ast.TypeListLiteral, ast.TypeIdentifier: + v, dt, err := runtime.RunStmt(ctx, funcExpr.Param[7]) + if err != nil { + return err + } + if dt != ast.List { + break + } + switch v := v.(type) { + case []any: + for _, k := range v { + if k, ok := k.(string); ok { + valueDelimiters = append(valueDelimiters, k) + } + } + if len(valueDelimiters)%2 != 0 { + return runtime.NewRunError(ctx, fmt.Sprintf("param value_Delimiters expect even number, got %d", + len(valueDelimiters)), funcExpr.NamePos) + } + default: + } + + default: + return runtime.NewRunError(ctx, fmt.Sprintf("param value_Delimiters expect ListInitExpr or Identifier, got %s", + funcExpr.Param[7].NodeType), funcExpr.NamePos) + } + } + + result := kvSplit(val, includeKeys, fieldSplit, valueSplit, trimKey, trimValue, prefix, valueDelimiters) if len(result) == 0 { ctx.Regs.ReturnAppend(false, ast.Bool) return nil @@ -268,7 +308,7 @@ func KVSplit(ctx *runtime.Task, funcExpr *ast.CallExpr) *errchain.PlError { } func kvSplit(str string, includeKeys []string, fieldSplit, valueSplit *regexp.Regexp, - trimKey, trimValue, prefix string, + trimKey, trimValue, prefix string, valueDelimiters []string, ) map[string]string { if str == "" { return nil @@ -283,14 +323,19 @@ func kvSplit(str string, includeKeys []string, fieldSplit, valueSplit *regexp.Re } ks := map[string]struct{}{} - + vd := map[string]string{} for _, v := range includeKeys { ks[v] = struct{}{} } + for i := 0; i < len(valueDelimiters); i += 2 { + vd[valueDelimiters[i]] = valueDelimiters[i+1] + } + result := map[string]string{} fields := fieldSplit.Split(str, -1) - for _, field := range fields { + separators := fieldSplit.FindAllString(str, -1) + for i, field := range fields { keyValue := valueSplit.Split(field, 2) if len(keyValue) == 2 { @@ -307,10 +352,39 @@ func kvSplit(str string, includeKeys []string, fieldSplit, valueSplit *regexp.Re continue } } - + value := keyValue[1] + if last, ok := vd[string(value[0])]; ok { + j := i + if string(value[0]) == last { + count := strings.Count(value, last) + for count%2 != 0 { + if j+1 >= len(fields) { + break + } + value = value + separators[j] + fields[j+1] + count += strings.Count(fields[j+1], last) + j++ + } + } else { + countA, countB := strings.Count(value, string(value[0])), strings.Count(value, last) + for countA > countB { + if j+1 >= len(fields) { + break + } + value = value + separators[j] + fields[j+1] + countA += strings.Count(fields[j+1], string(value[0])) + countB += strings.Count(fields[j+1], last) + j++ + } + } + end := strings.LastIndex(value, last) + if end > 0 { + value = value[1:end] + } + } // trim value if trimValue != "" { - keyValue[1] = strings.Trim(keyValue[1], trimValue) + value = strings.Trim(value, trimValue) } // prefix + key @@ -319,7 +393,7 @@ func kvSplit(str string, includeKeys []string, fieldSplit, valueSplit *regexp.Re } // append to result - result[keyValue[0]] = keyValue[1] + result[keyValue[0]] = value } } return result diff --git a/pipeline/ptinput/funcs/md/kv_split.en.md b/pipeline/ptinput/funcs/md/kv_split.en.md index b8426e65..0eaf55ab 100644 --- a/pipeline/ptinput/funcs/md/kv_split.en.md +++ b/pipeline/ptinput/funcs/md/kv_split.en.md @@ -1,6 +1,6 @@ ### `kv_split()` {#fn-kv_split} -Function prototype: `fn kv_split(key, field_split_pattern = " ", value_split_pattern = "=", trim_key = "", trim_value = "", include_keys = [], prefix = "") -> bool` +Function prototype: `fn kv_split(key, field_split_pattern = " ", value_split_pattern = "=", trim_key = "", trim_value = "", include_keys = [], prefix = "", value_delimiters = []) -> bool` Function description: extract all key-value pairs from a string @@ -13,6 +13,7 @@ Function parameters: - `trim_key`: delete all the specified characters leading and trailing the extracted key; the default value is "" - `trim_value`: remove all leading and trailing characters from the extracted value; the default value is "" - `prefix`: add prefix to all keys +- `value_delimiters`: defines paired delimiters (e.g., ['[', ']']) for handling values wrapped by specified characters; default value is [], do not process any delimiters. Example: diff --git a/pipeline/ptinput/funcs/md/kv_split.md b/pipeline/ptinput/funcs/md/kv_split.md index c1e2e334..26e0520d 100644 --- a/pipeline/ptinput/funcs/md/kv_split.md +++ b/pipeline/ptinput/funcs/md/kv_split.md @@ -1,6 +1,6 @@ ### `kv_split()` {#fn-kv_split} -函数原型:`fn kv_split(key, field_split_pattern = " ", value_split_pattern = "=", trim_key = "", trim_value = "", include_keys = [], prefix = "") -> bool` +函数原型:`fn kv_split(key, field_split_pattern = " ", value_split_pattern = "=", trim_key = "", trim_value = "", include_keys = [], prefix = "", value_delimiters = []) -> bool` 函数说明:从字符串中提取出所有的键值对 @@ -13,6 +13,7 @@ - `trim_key`: 删除提取出的 key 的前导和尾随的所有指定的字符;默认值为 `""` - `trim_value`: 删除提取出的 value 的前导和尾随的所有指定的字符;默认值为 `""` - `prefix`: 给所有的 key 添加前缀字符串 +- `value_delimiters`: 定义成对的分隔符(如 `['[', ']']`),用于处理value被指定字符包裹的情况;默认值为 `[]`,不处理任何分隔符 示例: