Sql-Server

SQL 查詢從 1 秒減慢到 11 分鐘 - 為什麼?

  • May 30, 2021

問題:我將以下查詢(按外鍵依賴項列出表)移植到 PostGreSql。

WITH Fkeys AS (

   SELECT DISTINCT 
        OnTable       = OnTable.name
       ,AgainstTable  = AgainstTable.name 
   FROM sysforeignkeys fk 

       INNER JOIN sysobjects onTable 
           ON fk.fkeyid = onTable.id 

       INNER JOIN sysobjects againstTable  
           ON fk.rkeyid = againstTable.id 

   WHERE 1=1
       AND AgainstTable.TYPE = 'U'
       AND OnTable.TYPE = 'U'
       -- ignore self joins; they cause an infinite recursion
       AND OnTable.Name <> AgainstTable.Name
   )

,MyData AS (

   SELECT 
        OnTable = o.name 
       ,AgainstTable = FKeys.againstTable 
   FROM sys.objects o 

   LEFT JOIN FKeys
       ON o.name = FKeys.onTable 

   WHERE (1=1) 
       AND o.type = 'U' 
       AND o.name NOT LIKE 'sys%' 
   )

,MyRecursion AS (

   -- base case
   SELECT  
        TableName    = OnTable
       ,Lvl        = 1
   FROM MyData
   WHERE 1=1
       AND AgainstTable IS NULL 

   -- recursive case
   UNION ALL 

   SELECT 
        TableName = OnTable 
       ,Lvl       = r.Lvl + 1 
   FROM MyData d 
       INNER JOIN MyRecursion r 
           ON d.AgainstTable = r.TableName 
)
SELECT 
    Lvl = MAX(Lvl)
   ,TableName
   --,strSql = 'delete from [' + tablename + ']'
FROM 
   MyRecursion
GROUP BY
   TableName

ORDER BY lvl

/*
ORDER BY 

    2 ASC
   ,1 ASC

*/

使用 information_schema,查詢如下所示:

WITH Fkeys AS 
(
   SELECT DISTINCT 
        KCU1.TABLE_NAME AS OnTable 
       ,KCU2.TABLE_NAME AS AgainstTable 
   FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC 

   LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 
       ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG  
       AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA 
       AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME 

   LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 
       ON KCU2.CONSTRAINT_CATALOG =  RC.UNIQUE_CONSTRAINT_CATALOG  
       AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA 
       AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME 
       AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION 

   WHERE (1=1)
   AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME 
)

,MyData AS 
( 
   SELECT 
        TABLE_NAME AS OnTable  
       ,FKeys.againstTable AS AgainstTable
   FROM INFORMATION_SCHEMA.TABLES 

   LEFT JOIN FKeys
       ON TABLE_NAME = FKeys.onTable  

   WHERE (1=1) 
       AND TABLE_TYPE = 'BASE TABLE'
       AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties') 
)

,MyRecursion AS 
(
   -- base case
   SELECT  
        OnTable AS TableName 
       ,1 AS Lvl 
   FROM MyData
   WHERE 1=1
   AND AgainstTable IS NULL 

   -- recursive case
   UNION ALL 

   SELECT 
        OnTable AS TableName
       ,r.Lvl + 1 AS Lvl 
   FROM MyData d 

   INNER JOIN MyRecursion r 
       ON d.AgainstTable = r.TableName 
)

SELECT 
    MAX(Lvl) AS Lvl 
   ,TableName
   --,strSql = 'delete from [' + tablename + ']'
FROM 
   MyRecursion
GROUP BY
   TableName

ORDER BY lvl

/*
ORDER BY 

    2 ASC
   ,1 ASC

*/

我現在的問題是:

在 SQL Server 中(在 2008 R2 上測試):為什麼我替換時查詢從 1 秒跳到 11 分鐘

SELECT DISTINCT 
    OnTable       = OnTable.name
   ,AgainstTable  = AgainstTable.name 
FROM sysforeignkeys fk 

   INNER JOIN sysobjects onTable 
       ON fk.fkeyid = onTable.id 

   INNER JOIN sysobjects againstTable  
       ON fk.rkeyid = againstTable.id 

WHERE 1=1
   AND AgainstTable.TYPE = 'U'
   AND OnTable.TYPE = 'U'
   -- ignore self joins; they cause an infinite recursion
   AND OnTable.Name <> AgainstTable.Name

SELECT DISTINCT 
    KCU1.TABLE_NAME AS OnTable 
   ,KCU2.TABLE_NAME AS AgainstTable 
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC 

LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 
   ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG  
   AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA 
   AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME 

LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 
   ON KCU2.CONSTRAINT_CATALOG =  RC.UNIQUE_CONSTRAINT_CATALOG  
   AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA 
   AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME 
   AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION 

WHERE (1=1)
AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME 

???

據我所知,單獨執行部分查詢時確實沒有顯著的速度差異。結果集也完全相同(我檢查了 Excel 中的每一行),儘管順序不同。

低於工作 PostGreSQL 版本(在完全相同的數據庫內容上在 35 毫秒內完成

$$ 75 tables $$…)

– 沒有任何保證 –

WITH RECURSIVE Fkeys AS 
(
   SELECT DISTINCT 
        KCU1.TABLE_NAME AS OnTable 
       ,KCU2.TABLE_NAME AS AgainstTable 
   FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC 

   LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 
       ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG  
       AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA 
       AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME 

   LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 
       ON KCU2.CONSTRAINT_CATALOG =  RC.UNIQUE_CONSTRAINT_CATALOG  
       AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA 
       AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME 
       AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION 
)

,MyData AS 
( 
   SELECT 
        TABLE_NAME AS OnTable  
       ,FKeys.againstTable AS AgainstTable
   FROM INFORMATION_SCHEMA.TABLES 

   LEFT JOIN FKeys
       ON TABLE_NAME = FKeys.onTable  

   WHERE (1=1) 
       AND TABLE_TYPE = 'BASE TABLE'
       AND TABLE_SCHEMA = 'public'
       --AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties') 
)


,MyRecursion AS 
(
   -- base case
   SELECT  
        OnTable AS TableName 
       ,1 AS Lvl 
   FROM MyData
   WHERE 1=1
   AND AgainstTable IS NULL 

   -- recursive case
   UNION ALL 

   SELECT 
        OnTable AS TableName
       ,r.Lvl + 1 AS Lvl 
   FROM MyData d 

   INNER JOIN MyRecursion r 
       ON d.AgainstTable = r.TableName 
)

SELECT 
    MAX(Lvl) AS Lvl 
   ,TableName
   --,strSql = 'delete from [' + tablename + ']'
FROM 
   MyRecursion
GROUP BY
   TableName

ORDER BY lvl


/*
ORDER BY 

    2 ASC
   ,1 ASC

*/

似乎也

AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME

在使用 information_schema 時是多餘的,所以它實際上應該更快。

我可能會放棄INFORMATION_SCHEMA這裡的視圖並使用新sys.視圖(而不是向後兼容的視圖),或者至少首先將結果實現JOIN到索引表中。

遞歸 CTE 在 SQL Server 中始終獲得相同的基本計劃,其中每一行都被添加到堆棧假離線並一一處理。這意味著之間的連接REFERENTIAL_CONSTRAINTS RC, KEY_COLUMN_USAGE KCU1, KEY_COLUMN_USAGE KCU2將與以下查詢的結果一樣多次SELECT COUNT(*) FROM MyRecursion

我假設在您的情況下(從 11 分鐘的執行時間開始)可能是數千次,因此您需要遞歸部分盡可能高效。您的查詢將執行數千次以下類型的事情。

  SELECT  
          KCU1.TABLE_CATALOG,
          KCU1.TABLE_SCHEMA,
          KCU1.TABLE_NAME
   FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC 
   INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 
       ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG  
       AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA 
       AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME 
   INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 
       ON KCU2.CONSTRAINT_CATALOG =  RC.UNIQUE_CONSTRAINT_CATALOG  
       AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA 
       AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME 
       AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION 
   WHERE KCU2.TABLE_NAME = 'FOO' 

(旁注:如果不同模式中的表名相同,則查詢的兩個版本都將返回不正確的結果)

正如你所看到的,這個計劃非常可怕。

計劃

將此與您的sys查詢計劃進行比較,這有點簡單。

SELECT OnTable = OnTable.name, 
      AgainstTable = AgainstTable.name 
FROM   sysforeignkeys fk 
      INNER JOIN sysobjects OnTable 
        ON fk.fkeyid = OnTable.id 
      INNER JOIN sysobjects AgainstTable 
        ON fk.rkeyid = AgainstTable.id 
WHERE  AgainstTable.name = 'FOO' 

計劃 2

您可以通過更改 to 的定義來鼓勵中間物化,而無需顯式創建#tempMyData

MyData AS 
( 
   SELECT TOP 99.999999 PERCENT
        TABLE_NAME AS OnTable  
       ,Fkeys.AgainstTable AS AgainstTable
   FROM INFORMATION_SCHEMA.TABLES 

   LEFT JOIN Fkeys
       ON TABLE_NAME = Fkeys.OnTable  

   WHERE (1=1) 
       AND TABLE_TYPE = 'BASE TABLE'
       AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties') 
       ORDER BY TABLE_NAME
)

在我的機器上進行測試,Adventureworks2008這使執行時間從大約 10 秒下降到 250 毫秒(在第一次執行之後因為計劃需要 2 秒來編譯)。它在計劃中添加了一個急切的假離線,在第一個遞歸呼叫上實現 Join 的結果,然後在後續呼叫中重放它。但是,不能保證此行為,您可能希望支持 Connect 項目請求提供提示以強制 CTE 或派生表的中間實現

我會感到更安全#temp,如下所示明確創建表格,而不是依賴這種行為。

CREATE TABLE #MyData
(
OnTable SYSNAME,
AgainstTable NVARCHAR(128) NULL,
UNIQUE CLUSTERED (AgainstTable, OnTable)
);

WITH Fkeys AS 
(
   SELECT DISTINCT 
        KCU1.TABLE_NAME AS OnTable 
       ,KCU2.TABLE_NAME AS AgainstTable 
   FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC 

   LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 
       ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG  
       AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA 
       AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME 

   LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 
       ON KCU2.CONSTRAINT_CATALOG =  RC.UNIQUE_CONSTRAINT_CATALOG  
       AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA 
       AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME 
       AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION 

   WHERE (1=1)
   AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME 
)

,MyData AS 
( 
   SELECT 
        TABLE_NAME AS OnTable  
       ,Fkeys.AgainstTable AS AgainstTable
   FROM INFORMATION_SCHEMA.TABLES 

   LEFT JOIN Fkeys
       ON TABLE_NAME = Fkeys.OnTable  

   WHERE (1=1) 
       AND TABLE_TYPE = 'BASE TABLE'
       AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties') 
)
INSERT INTO #MyData
SELECT *
FROM MyData;


WITH MyRecursion AS 
(
   -- base case
   SELECT  
        OnTable AS TableName 
       ,1 AS Lvl 
   FROM #MyData
   WHERE 1=1
   AND AgainstTable IS NULL 

   -- recursive case
   UNION ALL 

   SELECT 
        OnTable AS TableName
       ,r.Lvl + 1 AS Lvl 
   FROM #MyData d 

   INNER JOIN MyRecursion r 
       ON d.AgainstTable = r.TableName 
)

SELECT 
    MAX(Lvl) AS Lvl 
   ,TableName
   --,strSql = 'delete from [' + tablename + ']'
FROM 
   MyRecursion
GROUP BY
   TableName

ORDER BY Lvl

DROP TABLE #MyData

或者

引用自:https://dba.stackexchange.com/questions/15596