diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 3e9926239672..58b5efae215d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4461,6 +4461,14 @@ MergeTreeDataPartFormat MergeTreeData::choosePartFormat(size_t bytes_uncompresse using PartType = MergeTreeDataPartType; using PartStorageType = MergeTreeDataPartStorageType; + /// Deprecated Object type doesn't work correctly with compact parts + /// when write_marks_for_substreams_in_compact_parts is disabled, + /// because the compact reader cannot properly handle missing substreams + /// in complex nested Tuple structures that Object converts to. + /// Force wide parts for such tables to avoid deserialization crashes. + if (hasDynamicSubcolumnsDeprecated(getInMemoryMetadataPtr()->getColumns())) + return {PartType::Wide, PartStorageType::Full}; + String out_reason; const auto settings = getSettings(); if (!canUsePolymorphicParts(*settings, out_reason)) diff --git a/tests/queries/0_stateless/03779_object_in_compact_mergetree_parts.reference b/tests/queries/0_stateless/03779_object_in_compact_mergetree_parts.reference new file mode 100644 index 000000000000..53742b87fafc --- /dev/null +++ b/tests/queries/0_stateless/03779_object_in_compact_mergetree_parts.reference @@ -0,0 +1,4 @@ +1 [['aaa','ccc']] [['bbb','']] [[0,0]] [''] +2 [['','']] [['ddd','']] [[10,20]] ['foo'] +1 [['aaa','ccc']] [['bbb','']] [[0,0]] [''] +2 [['','']] [['ddd','']] [[10,20]] ['foo'] diff --git a/tests/queries/0_stateless/03779_object_in_compact_mergetree_parts.sql b/tests/queries/0_stateless/03779_object_in_compact_mergetree_parts.sql new file mode 100644 index 000000000000..06ab06abca96 --- /dev/null +++ b/tests/queries/0_stateless/03779_object_in_compact_mergetree_parts.sql @@ -0,0 +1,32 @@ +SET allow_experimental_object_type = 1; + +-- Test with the setting being disabled +DROP TABLE IF EXISTS t_json_complex; +CREATE TABLE t_json_complex (id UInt32, arr Array(Object('json'))) +ENGINE = MergeTree ORDER BY id +SETTINGS write_marks_for_substreams_in_compact_parts=0; + +-- Insert data with nested arrays inside JSON objects +INSERT INTO t_json_complex FORMAT JSONEachRow {"id": 1, "arr": [{"k1": [{"k2": "aaa", "k3": "bbb"}, {"k2": "ccc"}]}]} + +INSERT INTO t_json_complex FORMAT JSONEachRow {"id": 2, "arr": [{"k1": [{"k3": "ddd", "k4": 10}, {"k4": 20}], "k5": {"k6": "foo"}}]} + +-- This query used to crash the server +SELECT id, arr.k1.k2, arr.k1.k3, arr.k1.k4, arr.k5.k6 FROM t_json_complex ORDER BY id; +DROP TABLE t_json_complex; + +-- Now test with the setting explicitly enabled +DROP TABLE IF EXISTS t_json_complex_compact_parts; +CREATE TABLE t_json_complex_compact_parts (id UInt32, arr Array(Object('json'))) +ENGINE = MergeTree ORDER BY id +SETTINGS write_marks_for_substreams_in_compact_parts=1; + +-- Insert data with nested arrays inside JSON objects +INSERT INTO t_json_complex_compact_parts FORMAT JSONEachRow {"id": 1, "arr": [{"k1": [{"k2": "aaa", "k3": "bbb"}, {"k2": "ccc"}]}]} + +INSERT INTO t_json_complex_compact_parts FORMAT JSONEachRow {"id": 2, "arr": [{"k1": [{"k3": "ddd", "k4": 10}, {"k4": 20}], "k5": {"k6": "foo"}}]} + +-- This query used to crash the server +SELECT id, arr.k1.k2, arr.k1.k3, arr.k1.k4, arr.k5.k6 FROM t_json_complex_compact_parts ORDER BY id; +DROP TABLE t_json_complex_compact_parts; +