Skip to content

Commit 837515d

Browse files
GregoryTravisjdunkerley
authored andcommitted
Text_Column.mid for in-memory and database backends (#14420)
(cherry picked from commit 51f07c1)
1 parent 9510099 commit 837515d

File tree

11 files changed

+219
-36
lines changed

11 files changed

+219
-36
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
- [Read files into DuckDB both spatial and not.][14367]
7777
- [Implement Text_Column to_case for DB backends][14386]
7878
- [Implement bulk loading to DuckDB][14402]
79+
- [Implement `Text_Column.text_mid` for in-memory and database backends.][14420]
7980
- [Initial file writing from DuckDB][14421]
8081

8182
[13769]: https://github.com/enso-org/enso/pull/13769
@@ -101,6 +102,7 @@
101102
[14367]: https://github.com/enso-org/enso/pull/14367
102103
[14386]: https://github.com/enso-org/enso/pull/14386
103104
[14402]: https://github.com/enso-org/enso/pull/14402
105+
[14420]: https://github.com/enso-org/enso/pull/14420
104106
[14421]: https://github.com/enso-org/enso/pull/14421
105107

106108
#### Enso Language & Runtime

distribution/lib/Standard/Database/0.0.0-dev/src/Dialects/Dialect_Flag.enso

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ type Dialect_Flag
147147
private: true
148148
---
149149
Specifies how the max size of the char type behaves after
150-
text_left/text_right.
150+
text_left, _right, and _mid.
151151
If True the original size is kept, if False the size is reset.
152152
Char_Max_Size_After_Substring_Kept
153153
## ---

distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ base_dialect_operations =
494494
functions = [["COALESCE", make_function "COALESCE"], ["ROW_MIN", make_function "MIN"], ["ROW_MAX", make_function "MAX"]]
495495
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
496496
counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
497-
text = [is_empty, bin "LIKE", simple_equals_ignore_case, fold_case, make_case_sensitive, length]
497+
text = [is_empty, bin "LIKE", simple_equals_ignore_case, fold_case, make_case_sensitive, length, ["SUBSTR", make_function "SUBSTR"]]
498498
nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
499499
contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
500500
types = [simple_cast]

distribution/lib/Standard/Database/0.0.0-dev/src/Internal/DB_Column_Implementation.enso

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import Standard.Base.Internal.Rounding_Helpers
1010
from Standard.Base.Metadata.Widget import Text_Input
1111
from Standard.Base.Widget_Helpers import make_format_chooser
1212

13+
import Standard.Table.Column.Rest_Of_String
1314
import Standard.Table.Fill_With.Fill_With
1415
import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper
1516
import Standard.Table.Internal.Date_Time_Helpers
@@ -342,6 +343,17 @@ type DB_Column_Implementation
342343
new_name = (naming_helper this_column).function_name "text_right" [this_column, n]
343344
make_binary_op this_column "RIGHT" n2 new_name
344345

346+
text_mid (this_column : Column & DB_Column) (start : Column | Any) (length : Column | Any) =
347+
Value_Type.expect_integer start <|
348+
case length of
349+
_ : Rest_Of_String ->
350+
new_name = (naming_helper this_column).function_name "text_mid" [this_column, start]
351+
make_op this_column "SUBSTR" [start + 1] new_name
352+
_ ->
353+
Value_Type.expect_integer length <|
354+
new_name = (naming_helper this_column).function_name "text_mid" [this_column, start, length]
355+
make_op this_column "SUBSTR" [start + 1, length] new_name
356+
345357
contains (this_column : Column & DB_Column) (other : Column | Text | Any) case_sensitivity:Case_Sensitivity =
346358
new_name = (naming_helper this_column).function_name "contains" [this_column, other]
347359
make_text_case_op this_column "CONTAINS" other case_sensitivity new_name

distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ type Postgres_Dialect
439439
---
440440
make_dialect_operations =
441441
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"], ["PROPER", Base_Generator.make_function "INITCAP"]]
442-
text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive, ["REPLACE", replace], left, right, regex_match]+concat_ops+cases+trim_ops
442+
text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive, ["REPLACE", replace], left, right, ["SUBSTR", _make_mid], regex_match]+concat_ops+cases+trim_ops
443443
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
444444
arith_extensions = [is_nan, is_inf, is_finite, floating_point_div, mod_op, decimal_div, decimal_mod, ["ROW_MIN", Base_Generator.make_function "LEAST"], ["ROW_MAX", Base_Generator.make_function "GREATEST"]]
445445
bool = [bool_or]
@@ -695,6 +695,22 @@ left = Base_Generator.lift_binary_op "LEFT" str-> n->
695695
right = Base_Generator.lift_binary_op "RIGHT" str-> n->
696696
SQL_Builder.code "right(" ++ str ++ ", CAST(" ++ n ++ " AS INT))"
697697

698+
## ---
699+
private: true
700+
---
701+
private _make_mid arguments:Vector -> SQL_Builder =
702+
case arguments.length of
703+
2 ->
704+
str = arguments.at 0
705+
start = arguments.at 1
706+
SQL_Builder.code "substr(" ++ str ++ ", (" ++ start ++ ")::integer)"
707+
3 ->
708+
str = arguments.at 0
709+
start = arguments.at 1
710+
length = arguments.at 2
711+
SQL_Builder.code "substr(" ++ str ++ ", (" ++ start ++ ")::integer, (" ++ length ++ ")::integer)"
712+
_ -> Error.throw (Illegal_Argument.Error "SUBSTR requires 2 or 3 arguments.")
713+
698714
## ---
699715
private: true
700716
---

distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ operations_dict =
252252

253253
always_boolean_ops = ["==", "!=", "EQUALS_IGNORE_CASE", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "STARTS_WITH", "ENDS_WITH", "CONTAINS", "BOOL_OR", "IS_INF", "IS_FINITE"]
254254
always_floating_ops = ["/", "MOD", "AVG", "STDDEV_POP", "STDDEV_SAMP", "ROUND"]
255-
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM", "REPLACE", "LEFT", "RIGHT", "UPPER", "LOWER"]
255+
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM", "REPLACE", "LEFT", "RIGHT", "SUBSTR", "UPPER", "LOWER"]
256256
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "COUNT_OVER_PARTITION", "ROW_NUMBER", "ROW_NUMBER_IN_GROUP", "GROUP_NUMBER", "GROUP_NUMBER_EQUAL_COUNT", "LENGTH", "SIGNUM"]
257257
same_as_first = ["TRUNCATE", "CEIL", "FLOOR", "FIRST", "LAST", "ABS"]
258258
arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]

distribution/lib/Standard/Microsoft/0.0.0-dev/src/Internal/SQLServer_Dialect.enso

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,7 @@ private _op_needs_to_materialize_null_checks op -> Boolean =
549549
---
550550
make_dialect_operations =
551551
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
552-
text = [starts_with, contains, ends_with, like, agg_shortest, agg_longest, make_case_sensitive, ["REPLACE", replace], left, right]+concat_ops+cases+trim_ops
552+
text = [starts_with, contains, ends_with, like, agg_shortest, agg_longest, make_case_sensitive, ["REPLACE", replace], left, right, ["SUBSTR", _make_mid]]+concat_ops+cases+trim_ops
553553
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
554554
arith_extensions = [floating_point_div, mod_op, decimal_div, decimal_mod, ["ROW_MIN", Base_Generator.make_function "LEAST"], ["ROW_MAX", Base_Generator.make_function "GREATEST"]]
555555
bool = [bool_or]
@@ -711,6 +711,22 @@ left = Base_Generator.lift_binary_op "LEFT" str-> n->
711711
right = Base_Generator.lift_binary_op "RIGHT" str-> n->
712712
SQL_Builder.code "right(" ++ str ++ ", CAST(" ++ n ++ " AS INT))"
713713

714+
## ---
715+
private: true
716+
---
717+
private _make_mid arguments:Vector -> SQL_Builder =
718+
case arguments.length of
719+
2 ->
720+
str = arguments.at 0
721+
start = arguments.at 1
722+
SQL_Builder.code "substring(" ++ str ++ ", " ++ start ++ ")"
723+
3 ->
724+
str = arguments.at 0
725+
start = arguments.at 1
726+
length = arguments.at 2
727+
SQL_Builder.code "substring(" ++ str ++ ", " ++ start ++ ", " ++ length ++ ")"
728+
_ -> Error.throw (Illegal_Argument.Error "SUBSTRING requires 2 or 3 arguments.")
729+
714730
## ---
715731
private: true
716732
---

distribution/lib/Standard/Table/0.0.0-dev/docs/api/Column.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,11 @@
8686
- starts_with self other:(Standard.Table.Column.Column|Standard.Base.Data.Text.Text|Standard.Base.Any.Any) case_sensitivity:Standard.Base.Data.Text.Case_Sensitivity.Case_Sensitivity= -> (Standard.Table.Column.Column&Standard.Base.Any.Any)
8787
- take self range:(Standard.Base.Data.Index_Sub_Range.Index_Sub_Range|Standard.Base.Data.Range.Range|Standard.Base.Data.Numbers.Integer)= -> (Standard.Table.Column.Column&Standard.Base.Any.Any)
8888
- text_cleanse self remove:(Standard.Base.Data.Vector.Vector Standard.Base.Data.Text.Regex.Named_Pattern.Named_Pattern) -> (Standard.Table.Column.Column&Standard.Base.Any.Any)
89-
- text_left self n:(Standard.Table.Column.Column|Standard.Base.Data.Numbers.Integer) -> (Standard.Table.Column.Column&Standard.Base.Any.Any)
89+
- text_left self n:(Standard.Table.Column.Column|Standard.Base.Data.Numbers.Integer|Standard.Base.Nothing.Nothing)= -> (Standard.Table.Column.Column&Standard.Base.Any.Any)
9090
- text_length self -> (Standard.Table.Column.Column&Standard.Base.Any.Any)
91+
- text_mid self start:(Standard.Table.Column.Column|Standard.Base.Data.Numbers.Integer|Standard.Base.Nothing.Nothing) length:(Standard.Table.Column.Column|Standard.Base.Data.Numbers.Integer|Standard.Base.Nothing.Nothing|Standard.Table.Column.Rest_Of_String)= -> (Standard.Table.Column.Column&Standard.Base.Any.Any)
9192
- text_replace self term:(Standard.Base.Data.Text.Text|Standard.Base.Data.Text.Regex.Regex|Standard.Table.Column.Column)= new_text:(Standard.Base.Data.Text.Text|Standard.Table.Column.Column)= case_sensitivity:Standard.Base.Data.Text.Case_Sensitivity.Case_Sensitivity= only_first:Standard.Base.Data.Boolean.Boolean= -> (Standard.Table.Column.Column&Standard.Base.Any.Any)
92-
- text_right self n:(Standard.Table.Column.Column|Standard.Base.Data.Numbers.Integer) -> (Standard.Table.Column.Column&Standard.Base.Any.Any)
93+
- text_right self n:(Standard.Table.Column.Column|Standard.Base.Data.Numbers.Integer|Standard.Base.Nothing.Nothing)= -> (Standard.Table.Column.Column&Standard.Base.Any.Any)
9394
- to_js_object self -> Standard.Base.Data.Json.JS_Object
9495
- to_table self -> (Standard.Table.Table.Table|Standard.Base.Any.Any)
9596
- to_text self -> Standard.Base.Data.Text.Text
@@ -101,8 +102,9 @@
101102
- zip self right:(Standard.Table.Column.Column|Standard.Table.Table.Table)= keep_unmatched:(Standard.Base.Data.Boolean.Boolean|Standard.Base.Data.Vector.Report_Unmatched)= right_prefix:Standard.Base.Data.Text.Text= on_problems:Standard.Base.Errors.Problem_Behavior.Problem_Behavior= -> Standard.Table.Table.Table
102103
- || self other:(Standard.Table.Column.Column|Standard.Base.Any.Any) -> (Standard.Table.Column.Column&Standard.Base.Any.Any)
103104
- default_date_period column:Standard.Base.Any.Any -> Standard.Base.Any.Any
104-
- default_row_limit_for_read column:Standard.Base.Any.Any -> Standard.Base.Any.Any
105105
- Standard.Table.Column.Column.from that:Standard.Base.Data.Vector.Vector name:Standard.Base.Data.Text.Text= -> Standard.Table.Column.Column
106+
- type Rest_Of_String
107+
- default_row_limit_for_read column:Standard.Base.Any.Any -> Standard.Base.Any.Any
106108
- Standard.Base.Data.Vector.Vector.from that:Standard.Table.Column.Column -> Standard.Base.Data.Vector.Vector
107109
- Standard.Base.Data.Vector.Vector.from that:Standard.Table.Table.Table -> Standard.Base.Data.Vector.Vector
108110
- Standard.Table.Column.Column.from that:Standard.Base.Data.Range.Range name:Standard.Base.Data.Text.Text= -> Standard.Table.Column.Column

distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,7 @@ type Column
12511251
example_text_length =
12521252
Examples.text_column_1.text_left 5
12531253
```
1254-
text_left self (n : Column | Integer) -> Column & Any =
1254+
text_left self (n : Column | Integer | Nothing = 1) -> Column & Any =
12551255
self.implementation.text_left self n
12561256

12571257
## ---
@@ -1271,9 +1271,30 @@ type Column
12711271
example_text_length =
12721272
Examples.text_column_1.text_right 5
12731273
```
1274-
text_right self (n : Column | Integer) -> Column & Any =
1274+
text_right self (n : Column | Integer | Nothing = 1) -> Column & Any =
12751275
self.implementation.text_right self n
12761276

1277+
## ---
1278+
group: Standard.Base.Text
1279+
icon: preparation
1280+
---
1281+
Gets the characters starting at `start` and continuing for `length`
1282+
characters, for each element of the column.
1283+
In the Database backends, the default text mid method of the particular
1284+
database is used.
1285+
In the in-memory backend, this will give you the `length` graphemes of
1286+
the string, starting at `start`.
1287+
1288+
## Examples
1289+
### import Standard.Examples
1290+
1291+
```
1292+
example_text_length =
1293+
Examples.text_column_1.text_mid 5
1294+
```
1295+
text_mid self (start : Column | Integer | Nothing) (length : Column | Integer | Nothing | Rest_Of_String = Rest_Of_String) -> Column & Any =
1296+
self.implementation.text_mid self start length
1297+
12771298
## ---
12781299
group: Standard.Base.Logical
12791300
icon: preparation
@@ -2575,6 +2596,12 @@ type Column
25752596
offset self n:Integer=-1 fill_with:Fill_With=..Nothing -> Column & Any =
25762597
self.implementation.offset self n fill_with
25772598

2599+
## ---
2600+
private: true
2601+
---
2602+
Used to specify that the rest of the string is to be return.
2603+
type Rest_Of_String
2604+
25782605
## ---
25792606
private: true
25802607
---

distribution/lib/Standard/Table/0.0.0-dev/src/Internal/In_Memory_Column_Implementation.enso

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import Standard.Base.Internal.Rounding_Helpers
1616
from Standard.Base.Data.Index_Sub_Range import drop_helper, normalize_ranges, take_helper
1717

1818
import project.Column.Column
19+
import project.Column.Rest_Of_String
1920
import project.Constants.Previous_Value
2021
import project.Data_Formatter.Data_Formatter
2122
import project.Fill_With.Fill_With
@@ -359,16 +360,61 @@ type In_Memory_Column_Implementation
359360
Value_Type.expect_text this_column <|
360361
apply_unary_operation this_column TextLengthOperation.INSTANCE
361362

362-
text_left (this_column : Column & In_Memory_Column) (n : Column | Integer) =
363+
text_left (this_column : Column & In_Memory_Column) (n : Column | Integer | Nothing) =
363364
Value_Type.expect_text this_column <| Value_Type.expect_integer n <|
364365
new_name = naming_helper.function_name "text_left" [this_column, n]
365366
_apply_case_sensitive_text_operation this_column n ..Default TextPartOperation.LEFT (a -> b -> a.take b) new_name
366367

367-
text_right (this_column : Column & In_Memory_Column) (n : Column | Integer) =
368+
text_right (this_column : Column & In_Memory_Column) (n : Column | Integer | Nothing) =
368369
Value_Type.expect_text this_column <| Value_Type.expect_integer n <|
369370
new_name = naming_helper.function_name "text_right" [this_column, n]
370371
_apply_case_sensitive_text_operation this_column n ..Default TextPartOperation.RIGHT (a -> b -> a.take (..Last b)) new_name
371372

373+
text_mid (this_column : Column & In_Memory_Column) (start : Column | Integer | Nothing) (length : Column | Integer | Nothing | Rest_Of_String) =
374+
Value_Type.expect_text this_column <| Value_Type.expect_integer start <|
375+
new_name = naming_helper.function_name "text_mid" [this_column, start, length]
376+
377+
# Returns Nothing if any argument is Nothing, and returns "" for start out-of-bounds or if length is negative.
378+
do_substring s start length =
379+
if s.is_nothing || start.is_nothing || length.is_nothing then Nothing else
380+
nonneg_start = if start < 0 then 0 else start
381+
case length of
382+
_ : Rest_Of_String ->
383+
if nonneg_start >= s.length then "" else s.substring nonneg_start
384+
_ : Integer ->
385+
if length < 0 || nonneg_start >= s.length then "" else s.substring nonneg_start length
386+
387+
if Nothing == start || Nothing == length then this_column . const Nothing . rename new_name else
388+
case start of
389+
_ : Integer ->
390+
case length of
391+
_ : Integer ->
392+
_apply_unary_map this_column new_name (s -> do_substring s start length) expected_result_type=Value_Type.Char . cast this_column.value_type
393+
_ : Rest_Of_String ->
394+
_apply_unary_map this_column new_name (s -> do_substring s start length) expected_result_type=Value_Type.Char . cast this_column.value_type
395+
_ : Column ->
396+
Value_Type.expect_integer length <|
397+
_apply_binary_map this_column (s -> l-> do_substring s start l) length new_name skip_nulls=False expected_result_type=Value_Type.Char . cast this_column.value_type
398+
_ : Column ->
399+
case length of
400+
_ : Integer ->
401+
_apply_binary_map this_column (s -> st-> do_substring s st length) start new_name skip_nulls=False expected_result_type=Value_Type.Char . cast this_column.value_type
402+
_ : Rest_Of_String ->
403+
_apply_binary_map this_column (s -> st-> do_substring s st length) start new_name skip_nulls=False expected_result_type=Value_Type.Char . cast this_column.value_type
404+
_ : Column ->
405+
Value_Type.expect_integer length <|
406+
row_count = this_column.row_count
407+
java_this = this_column.java_column
408+
java_start = (start:In_Memory_Column).java_column
409+
java_length = (length:In_Memory_Column).java_column
410+
411+
builder = make_string_builder row_count
412+
0.up_to row_count . each i->
413+
replaced = do_substring (java_this.getItem i) (java_start.getItem i) (java_length.getItem i)
414+
builder.append replaced
415+
416+
In_Memory_Column.from_storage new_name builder.seal . cast this_column.value_type
417+
372418
contains (this_column : Column & In_Memory_Column) (other : Column | Text | Any) case_sensitivity:Case_Sensitivity = Value_Type.expect_text this_column <| Value_Type.expect_text other <|
373419
new_name = naming_helper.function_name "contains" [this_column, other]
374420
_apply_case_sensitive_text_operation this_column other case_sensitivity TextPredicates.CONTAINS (a -> b -> a.contains b case_sensitivity) new_name

0 commit comments

Comments
 (0)