Sql-Server

通過 OPENJSON 優化 json 數據的提取

  • September 24, 2021

我正在嘗試優化從 REST API 獲得的值的提取,該 API 在數組中返回 json 值。

這是一個最小的、完整的、可驗證的例子,它準確地反映了我正在做的事情。

USE tempdb;

DROP TABLE IF EXISTS dbo.json_test;

CREATE TABLE dbo.json_test
(
   json_test_id                int                 NOT NULL
       IDENTITY(1,1)
   , some_uniqueidentifier     uniqueidentifier    NULL
   , some_varchar              varchar(100)        NULL
   , the_json                  nvarchar(max)       NULL
);

INSERT INTO dbo.json_test (some_uniqueidentifier, some_varchar, the_json)
SELECT 
       some_uniqueidentifier       = NEWID()
     , some_varchar                = CONVERT(varchar(100), CRYPT_GEN_RANDOM(64), 1)
     , the_json = (
           SELECT st.* 
           FROM sys.tables st
               CROSS JOIN sys.tables st2
           WHERE st.object_id = t.object_id FOR JSON AUTO
           )
FROM sys.tables t;

;WITH src AS 
(
   SELECT jt.some_uniqueidentifier
       , jt.some_varchar
       , top_array.[key]
       , top_array.[value]
   FROM dbo.json_test jt
       CROSS APPLY OPENJSON(jt.the_json, N'lax $') top_array
),
src2 AS
(
   SELECT src.some_uniqueidentifier
       , src.some_varchar
       , src.[key]
       , src.[value]
       , inner_key = inner_array.[key]
       , inner_value = inner_array.[value]
   FROM src
       CROSS APPLY OPENJSON(src.[value], N'lax $') inner_array
)
SELECT src2.some_uniqueidentifier
   , src2.some_varchar
   , src2.[key]
   , [name]                                = MAX(CASE WHEN src2.[inner_key] = 'name'                               THEN src2.[inner_value] ELSE NULL END)
   , [object_id]                           = MAX(CASE WHEN src2.[inner_key] = 'object_id'                          THEN src2.[inner_value] ELSE NULL END)
   , [principal_id]                        = MAX(CASE WHEN src2.[inner_key] = 'principal_id'                       THEN src2.[inner_value] ELSE NULL END)
   , [schema_id]                           = MAX(CASE WHEN src2.[inner_key] = 'schema_id'                          THEN src2.[inner_value] ELSE NULL END)
   , [parent_object_id]                    = MAX(CASE WHEN src2.[inner_key] = 'parent_object_id'                   THEN src2.[inner_value] ELSE NULL END)
   , [type]                                = MAX(CASE WHEN src2.[inner_key] = 'type'                               THEN src2.[inner_value] ELSE NULL END)
   , [type_desc]                           = MAX(CASE WHEN src2.[inner_key] = 'type_desc'                          THEN src2.[inner_value] ELSE NULL END)
   , [create_date]                         = MAX(CASE WHEN src2.[inner_key] = 'create_date'                        THEN src2.[inner_value] ELSE NULL END)
   , [modify_date]                         = MAX(CASE WHEN src2.[inner_key] = 'modify_date'                        THEN src2.[inner_value] ELSE NULL END)
   , [is_ms_shipped]                       = MAX(CASE WHEN src2.[inner_key] = 'is_ms_shipped'                      THEN src2.[inner_value] ELSE NULL END)
   , [is_published]                        = MAX(CASE WHEN src2.[inner_key] = 'is_published'                       THEN src2.[inner_value] ELSE NULL END)
   , [is_schema_published]                 = MAX(CASE WHEN src2.[inner_key] = 'is_schema_published'                THEN src2.[inner_value] ELSE NULL END)
   , [lob_data_space_id]                   = MAX(CASE WHEN src2.[inner_key] = 'lob_data_space_id'                  THEN src2.[inner_value] ELSE NULL END)
   , [filestream_data_space_id]            = MAX(CASE WHEN src2.[inner_key] = 'filestream_data_space_id'           THEN src2.[inner_value] ELSE NULL END)
   , [max_column_id_used]                  = MAX(CASE WHEN src2.[inner_key] = 'max_column_id_used'                 THEN src2.[inner_value] ELSE NULL END)
   , [lock_on_bulk_load]                   = MAX(CASE WHEN src2.[inner_key] = 'lock_on_bulk_load'                  THEN src2.[inner_value] ELSE NULL END)
   , [uses_ansi_nulls]                     = MAX(CASE WHEN src2.[inner_key] = 'uses_ansi_nulls'                    THEN src2.[inner_value] ELSE NULL END)
   , [is_replicated]                       = MAX(CASE WHEN src2.[inner_key] = 'is_replicated'                      THEN src2.[inner_value] ELSE NULL END)
   , [has_replication_filter]              = MAX(CASE WHEN src2.[inner_key] = 'has_replication_filter'             THEN src2.[inner_value] ELSE NULL END)
   , [is_merge_published]                  = MAX(CASE WHEN src2.[inner_key] = 'is_merge_published'                 THEN src2.[inner_value] ELSE NULL END)
   , [is_sync_tran_subscribed]             = MAX(CASE WHEN src2.[inner_key] = 'is_sync_tran_subscribed'            THEN src2.[inner_value] ELSE NULL END)
   , [has_unchecked_assembly_data]         = MAX(CASE WHEN src2.[inner_key] = 'has_unchecked_assembly_data'        THEN src2.[inner_value] ELSE NULL END)
   , [text_in_row_limit]                   = MAX(CASE WHEN src2.[inner_key] = 'text_in_row_limit'                  THEN src2.[inner_value] ELSE NULL END)
   , [large_value_types_out_of_row]        = MAX(CASE WHEN src2.[inner_key] = 'large_value_types_out_of_row'       THEN src2.[inner_value] ELSE NULL END)
   , [is_tracked_by_cdc]                   = MAX(CASE WHEN src2.[inner_key] = 'is_tracked_by_cdc'                  THEN src2.[inner_value] ELSE NULL END)
   , [lock_escalation]                     = MAX(CASE WHEN src2.[inner_key] = 'lock_escalation'                    THEN src2.[inner_value] ELSE NULL END)
   , [lock_escalation_desc]                = MAX(CASE WHEN src2.[inner_key] = 'lock_escalation_desc'               THEN src2.[inner_value] ELSE NULL END)
   , [is_filetable]                        = MAX(CASE WHEN src2.[inner_key] = 'is_filetable'                       THEN src2.[inner_value] ELSE NULL END)
   , [is_memory_optimized]                 = MAX(CASE WHEN src2.[inner_key] = 'is_memory_optimized'                THEN src2.[inner_value] ELSE NULL END)
   , [durability]                          = MAX(CASE WHEN src2.[inner_key] = 'durability'                         THEN src2.[inner_value] ELSE NULL END)
   , [durability_desc]                     = MAX(CASE WHEN src2.[inner_key] = 'durability_desc'                    THEN src2.[inner_value] ELSE NULL END)
   , [temporal_type]                       = MAX(CASE WHEN src2.[inner_key] = 'temporal_type'                      THEN src2.[inner_value] ELSE NULL END)
   , [temporal_type_desc]                  = MAX(CASE WHEN src2.[inner_key] = 'temporal_type_desc'                 THEN src2.[inner_value] ELSE NULL END)
   , [history_table_id]                    = MAX(CASE WHEN src2.[inner_key] = 'history_table_id'                   THEN src2.[inner_value] ELSE NULL END)
   , [is_remote_data_archive_enabled]      = MAX(CASE WHEN src2.[inner_key] = 'is_remote_data_archive_enabled'     THEN src2.[inner_value] ELSE NULL END)
   , [is_external]                         = MAX(CASE WHEN src2.[inner_key] = 'is_external'                        THEN src2.[inner_value] ELSE NULL END)
   , [history_retention_period]            = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period'           THEN src2.[inner_value] ELSE NULL END)
   , [history_retention_period_unit]       = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period_unit'      THEN src2.[inner_value] ELSE NULL END)
   , [history_retention_period_unit_desc]  = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period_unit_desc' THEN src2.[inner_value] ELSE NULL END)
   , [is_node]                             = MAX(CASE WHEN src2.[inner_key] = 'is_node'                            THEN src2.[inner_value] ELSE NULL END)
   , [is_edge]                             = MAX(CASE WHEN src2.[inner_key] = 'is_edge'                            THEN src2.[inner_value] ELSE NULL END)
FROM src2
GROUP BY src2.some_uniqueidentifier
   , src2.some_varchar
   , src2.[key]
ORDER BY src2.some_uniqueidentifier
   , src2.some_varchar
   , src2.[key];

無論輸入表中包含多少行,查詢計劃都使用幾個嵌套循環連接想必這是使用CROSS APPLY運算符的神器。為了您的樂趣,我已經設置了一個DB Fiddle

有沒有更有效的方法將數據從 json 格式轉換為“真實”的列集?

我使用我的本地 SQL Server 2019 實例創建了上面的程式碼,但是目標將是 Azure SQL 數據庫,因此所有最新最好的選項都可用。

WITH您應該使用該子句直接從 json 數組中提取值,如下所示:

SELECT src.*
   , tt.*
FROM json_test src
CROSS APPLY OPENJSON(src.the_json , 'lax $')
   WITH (
         [name]                               sysname       '$.name'
       , [object_id]                          int           '$.object_id'
       , [principal_id]                       int           '$.principal_id'
       , [schema_id]                          smallint      '$.schema_id'
       , [parent_object_id]                   int           '$.parent_object_id'
       , [type]                               char(2)       '$.type'
       , [type_desc]                          nvarchar(60)  '$.type_desc'
       , [create_date]                        datetime      '$.create_date'
       , [modify_date]                        datetime      '$.modify_date'
       , [is_ms_shipped]                      bit           '$.is_ms_shipped'
       , [is_published]                       bit           '$.is_published'
       , [is_schema_published]                bit           '$.is_schema_published'
       , [lob_data_space_id]                  int           '$.lob_data_space_id'
       , [filestream_data_space_id]           int           '$.filestream_data_space_id'
       , [max_column_id_used]                 int           '$.max_column_id_used'
       , [lock_on_bulk_load]                  bit           '$.lock_on_bulk_load'
       , [uses_ansi_nulls]                    bit           '$.uses_ansi_nulls'
       , [is_replicated]                      bit           '$.is_replicated'
       , [has_replication_filter]             bit           '$.has_replication_filter'
       , [is_merge_published]                 bit           '$.is_merge_published'
       , [is_sync_tran_subscribed]            bit           '$.is_sync_tran_subscribed'
       , [has_unchecked_assembly_data]        bit           '$.has_unchecked_assembly_data'
       , [text_in_row_limit]                  int           '$.text_in_row_limit'
       , [large_value_types_out_of_row]       bit           '$.large_value_types_out_of_row'
       , [is_tracked_by_cdc]                  bit           '$.is_tracked_by_cdc'
       , [lock_escalation]                    tinyint       '$.lock_escalation'
       , [lock_escalation_desc]               nvarchar(60)  '$.lock_escalation_desc'
       , [is_filetable]                       bit           '$.is_filetable'
       , [is_memory_optimized]                bit           '$.is_memory_optimized'
       , [durability]                         tinyint       '$.durability'
       , [durability_desc]                    nvarchar(60)  '$.durability_desc'
       , [temporal_type]                      tinyint       '$.temporal_type'
       , [temporal_type_desc]                 nvarchar(60)  '$.temporal_type_desc'
       , [history_table_id]                   int           '$.history_table_id'
       , [is_remote_data_archive_enabled]     bit           '$.is_remote_data_archive_enabled'
       , [is_external]                        bit           '$.is_external'
       , [history_retention_period]           int           '$.history_retention_period'
       , [history_retention_period_unit]      int           '$.history_retention_period_unit'
       , [history_retention_period_unit_desc] nvarchar(10)  '$.history_retention_period_unit_desc'
       , [is_node]                            bit           '$.is_node'
       , [is_edge]                            bit           '$.is_edge'
   ) AS tt

這個 Fiddle比較了兩種方法的輸出,證明結果是相同的。

輸出中唯一缺少的列是表值函式key生成的值,該OPENJSON值僅在未WITH指定子句時返回。我的變體計劃只有一個嵌套循環,而且似乎效率更高。

在此處輸入圖像描述

OPENJSON CROSS APPLY 的 Microsoft Docs 範例

引用自:https://dba.stackexchange.com/questions/295787