Sql-Server
通過 OPENJSON 優化 json 數據的提取
我正在嘗試優化從 REST API 獲得的值的提取,該 API 在數組中返回 json 值。
這是一個最小的、完整的、可驗證的例子,它準確地反映了我正在做的事情。
USE tempdb; DROP TABLE IF EXISTS dbo.json_test; CREATE TABLE dbo.json_test ( json_test_id int NOT NULL IDENTITY(1,1) , some_uniqueidentifier uniqueidentifier NULL , some_varchar varchar(100) NULL , the_json nvarchar(max) NULL ); INSERT INTO dbo.json_test (some_uniqueidentifier, some_varchar, the_json) SELECT some_uniqueidentifier = NEWID() , some_varchar = CONVERT(varchar(100), CRYPT_GEN_RANDOM(64), 1) , the_json = ( SELECT st.* FROM sys.tables st CROSS JOIN sys.tables st2 WHERE st.object_id = t.object_id FOR JSON AUTO ) FROM sys.tables t; ;WITH src AS ( SELECT jt.some_uniqueidentifier , jt.some_varchar , top_array.[key] , top_array.[value] FROM dbo.json_test jt CROSS APPLY OPENJSON(jt.the_json, N'lax $') top_array ), src2 AS ( SELECT src.some_uniqueidentifier , src.some_varchar , src.[key] , src.[value] , inner_key = inner_array.[key] , inner_value = inner_array.[value] FROM src CROSS APPLY OPENJSON(src.[value], N'lax $') inner_array ) SELECT src2.some_uniqueidentifier , src2.some_varchar , src2.[key] , [name] = MAX(CASE WHEN src2.[inner_key] = 'name' THEN src2.[inner_value] ELSE NULL END) , [object_id] = MAX(CASE WHEN src2.[inner_key] = 'object_id' THEN src2.[inner_value] ELSE NULL END) , [principal_id] = MAX(CASE WHEN src2.[inner_key] = 'principal_id' THEN src2.[inner_value] ELSE NULL END) , [schema_id] = MAX(CASE WHEN src2.[inner_key] = 'schema_id' THEN src2.[inner_value] ELSE NULL END) , [parent_object_id] = MAX(CASE WHEN src2.[inner_key] = 'parent_object_id' THEN src2.[inner_value] ELSE NULL END) , [type] = MAX(CASE WHEN src2.[inner_key] = 'type' THEN src2.[inner_value] ELSE NULL END) , [type_desc] = MAX(CASE WHEN src2.[inner_key] = 'type_desc' THEN src2.[inner_value] ELSE NULL END) , [create_date] = MAX(CASE WHEN src2.[inner_key] = 'create_date' THEN src2.[inner_value] ELSE NULL END) , [modify_date] = MAX(CASE WHEN src2.[inner_key] = 'modify_date' THEN src2.[inner_value] ELSE NULL END) , [is_ms_shipped] = MAX(CASE WHEN src2.[inner_key] = 'is_ms_shipped' THEN src2.[inner_value] ELSE NULL END) , [is_published] = MAX(CASE WHEN src2.[inner_key] = 'is_published' THEN src2.[inner_value] ELSE NULL END) , [is_schema_published] = MAX(CASE WHEN src2.[inner_key] = 'is_schema_published' THEN src2.[inner_value] ELSE NULL END) , [lob_data_space_id] = MAX(CASE WHEN src2.[inner_key] = 'lob_data_space_id' THEN src2.[inner_value] ELSE NULL END) , [filestream_data_space_id] = MAX(CASE WHEN src2.[inner_key] = 'filestream_data_space_id' THEN src2.[inner_value] ELSE NULL END) , [max_column_id_used] = MAX(CASE WHEN src2.[inner_key] = 'max_column_id_used' THEN src2.[inner_value] ELSE NULL END) , [lock_on_bulk_load] = MAX(CASE WHEN src2.[inner_key] = 'lock_on_bulk_load' THEN src2.[inner_value] ELSE NULL END) , [uses_ansi_nulls] = MAX(CASE WHEN src2.[inner_key] = 'uses_ansi_nulls' THEN src2.[inner_value] ELSE NULL END) , [is_replicated] = MAX(CASE WHEN src2.[inner_key] = 'is_replicated' THEN src2.[inner_value] ELSE NULL END) , [has_replication_filter] = MAX(CASE WHEN src2.[inner_key] = 'has_replication_filter' THEN src2.[inner_value] ELSE NULL END) , [is_merge_published] = MAX(CASE WHEN src2.[inner_key] = 'is_merge_published' THEN src2.[inner_value] ELSE NULL END) , [is_sync_tran_subscribed] = MAX(CASE WHEN src2.[inner_key] = 'is_sync_tran_subscribed' THEN src2.[inner_value] ELSE NULL END) , [has_unchecked_assembly_data] = MAX(CASE WHEN src2.[inner_key] = 'has_unchecked_assembly_data' THEN src2.[inner_value] ELSE NULL END) , [text_in_row_limit] = MAX(CASE WHEN src2.[inner_key] = 'text_in_row_limit' THEN src2.[inner_value] ELSE NULL END) , [large_value_types_out_of_row] = MAX(CASE WHEN src2.[inner_key] = 'large_value_types_out_of_row' THEN src2.[inner_value] ELSE NULL END) , [is_tracked_by_cdc] = MAX(CASE WHEN src2.[inner_key] = 'is_tracked_by_cdc' THEN src2.[inner_value] ELSE NULL END) , [lock_escalation] = MAX(CASE WHEN src2.[inner_key] = 'lock_escalation' THEN src2.[inner_value] ELSE NULL END) , [lock_escalation_desc] = MAX(CASE WHEN src2.[inner_key] = 'lock_escalation_desc' THEN src2.[inner_value] ELSE NULL END) , [is_filetable] = MAX(CASE WHEN src2.[inner_key] = 'is_filetable' THEN src2.[inner_value] ELSE NULL END) , [is_memory_optimized] = MAX(CASE WHEN src2.[inner_key] = 'is_memory_optimized' THEN src2.[inner_value] ELSE NULL END) , [durability] = MAX(CASE WHEN src2.[inner_key] = 'durability' THEN src2.[inner_value] ELSE NULL END) , [durability_desc] = MAX(CASE WHEN src2.[inner_key] = 'durability_desc' THEN src2.[inner_value] ELSE NULL END) , [temporal_type] = MAX(CASE WHEN src2.[inner_key] = 'temporal_type' THEN src2.[inner_value] ELSE NULL END) , [temporal_type_desc] = MAX(CASE WHEN src2.[inner_key] = 'temporal_type_desc' THEN src2.[inner_value] ELSE NULL END) , [history_table_id] = MAX(CASE WHEN src2.[inner_key] = 'history_table_id' THEN src2.[inner_value] ELSE NULL END) , [is_remote_data_archive_enabled] = MAX(CASE WHEN src2.[inner_key] = 'is_remote_data_archive_enabled' THEN src2.[inner_value] ELSE NULL END) , [is_external] = MAX(CASE WHEN src2.[inner_key] = 'is_external' THEN src2.[inner_value] ELSE NULL END) , [history_retention_period] = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period' THEN src2.[inner_value] ELSE NULL END) , [history_retention_period_unit] = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period_unit' THEN src2.[inner_value] ELSE NULL END) , [history_retention_period_unit_desc] = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period_unit_desc' THEN src2.[inner_value] ELSE NULL END) , [is_node] = MAX(CASE WHEN src2.[inner_key] = 'is_node' THEN src2.[inner_value] ELSE NULL END) , [is_edge] = MAX(CASE WHEN src2.[inner_key] = 'is_edge' THEN src2.[inner_value] ELSE NULL END) FROM src2 GROUP BY src2.some_uniqueidentifier , src2.some_varchar , src2.[key] ORDER BY src2.some_uniqueidentifier , src2.some_varchar , src2.[key];
無論輸入表中包含多少行,查詢計劃都使用幾個嵌套循環連接。想必這是使用
CROSS APPLY
運算符的神器。為了您的樂趣,我已經設置了一個DB Fiddle。有沒有更有效的方法將數據從 json 格式轉換為“真實”的列集?
我使用我的本地 SQL Server 2019 實例創建了上面的程式碼,但是目標將是 Azure SQL 數據庫,因此所有最新最好的選項都可用。
WITH
您應該使用該子句直接從 json 數組中提取值,如下所示:SELECT src.* , tt.* FROM json_test src CROSS APPLY OPENJSON(src.the_json , 'lax $') WITH ( [name] sysname '$.name' , [object_id] int '$.object_id' , [principal_id] int '$.principal_id' , [schema_id] smallint '$.schema_id' , [parent_object_id] int '$.parent_object_id' , [type] char(2) '$.type' , [type_desc] nvarchar(60) '$.type_desc' , [create_date] datetime '$.create_date' , [modify_date] datetime '$.modify_date' , [is_ms_shipped] bit '$.is_ms_shipped' , [is_published] bit '$.is_published' , [is_schema_published] bit '$.is_schema_published' , [lob_data_space_id] int '$.lob_data_space_id' , [filestream_data_space_id] int '$.filestream_data_space_id' , [max_column_id_used] int '$.max_column_id_used' , [lock_on_bulk_load] bit '$.lock_on_bulk_load' , [uses_ansi_nulls] bit '$.uses_ansi_nulls' , [is_replicated] bit '$.is_replicated' , [has_replication_filter] bit '$.has_replication_filter' , [is_merge_published] bit '$.is_merge_published' , [is_sync_tran_subscribed] bit '$.is_sync_tran_subscribed' , [has_unchecked_assembly_data] bit '$.has_unchecked_assembly_data' , [text_in_row_limit] int '$.text_in_row_limit' , [large_value_types_out_of_row] bit '$.large_value_types_out_of_row' , [is_tracked_by_cdc] bit '$.is_tracked_by_cdc' , [lock_escalation] tinyint '$.lock_escalation' , [lock_escalation_desc] nvarchar(60) '$.lock_escalation_desc' , [is_filetable] bit '$.is_filetable' , [is_memory_optimized] bit '$.is_memory_optimized' , [durability] tinyint '$.durability' , [durability_desc] nvarchar(60) '$.durability_desc' , [temporal_type] tinyint '$.temporal_type' , [temporal_type_desc] nvarchar(60) '$.temporal_type_desc' , [history_table_id] int '$.history_table_id' , [is_remote_data_archive_enabled] bit '$.is_remote_data_archive_enabled' , [is_external] bit '$.is_external' , [history_retention_period] int '$.history_retention_period' , [history_retention_period_unit] int '$.history_retention_period_unit' , [history_retention_period_unit_desc] nvarchar(10) '$.history_retention_period_unit_desc' , [is_node] bit '$.is_node' , [is_edge] bit '$.is_edge' ) AS tt
這個 Fiddle比較了兩種方法的輸出,證明結果是相同的。
輸出中唯一缺少的列是表值函式
key
生成的值,該OPENJSON
值僅在未WITH
指定子句時返回。我的變體計劃只有一個嵌套循環,而且似乎效率更高。