Sql-Server

檢查分區函式是否存在值列表 ,如果不遍歷列表並將新的分區值添加到分區函式?

  • February 16, 2020

我有一個帶有分區表的 azure sql db,該分區表在 varchar 數據類型上具有分區函式。

在 Etl 過程中,我將獲得一個分區值列表作為我需要的參數:

  • 檢查分區函式是否存在分區值,如果不存在則遍歷值列表並添加到分區函式。
  • 對於作為參數提供的分區值列表,遍歷 $partition 函式以將 partitionids 作為逗號分隔列表獲取到新變數中
  • 使用以下命令截斷分區表:truncate parttable with partitions (@newvariable)

到目前為止,我在程式碼中得到的是:

-- Table, Partition Function, Partition Scheme

CREATE PARTITION FUNCTION PF_TEXT(varchar(20)) AS RANGE RIGHT FOR VALUES ('A', 'B', 'C', 'D', 'E')

CREATE PARTITION SCHEME PS_TEXT AS PARTITION PF_TEXT ALL TO ([PRIMARY])

CREATE TABLE [dbo].[fact_partitioned](
   [Col1] [int], NOT NULL
   [Col2] [int], NOT NULL
   [TEXT] [varchar(20)] NOT NULL
) ON PS_TEXT ([TEXT])



---------------------------

DECLARE @cmd nvarchar(max);
DECLARE @partitions nvarchar(max);
DECLARE @partitionids nvarchar(max);

SET @partitions = 'A','B','C'



-- Code to check Partition Function for @partitions values exist?
-- For @partitions values that don't exist, iterate @partitions values
and add to partition function.

--code to iterate @partitions values over $partition.PF_TEXT(@partition)
add result to @partitionids


SET @cmd = N'TRUNCATE TABLE dbo.pt WITH (PARTITIONS (' + @partitionids + N'));'; 

PRINT @cmd;

--EXEC sys.sp_executesql @cmd;

SQL Server 表分區本身就是范圍分區。儘管您可以實現列表分區(即分區列與邊界完全匹配),但請注意確保邊界精確匹配以避免在SPLIT操作過程中進行過多的數據移動​​和記錄。CHECK考慮在與邊界值匹配的事實表上創建一個約束(例如TEXT IN('A','B','C', …)。

以下是基於 DDL 和您問題中的範例數據的範例腳本,它假設(至少)事實表上存在聚集索引。我添加了一些額外的邊界來說明可以在 ETL 過程中引入現有邊界之間的新邊界,無需移動數據,假設分區列值與邊界完全匹配。有關每個步驟的詳細資訊,請參閱內聯註釋。

在我的測試中,從 staging 轉移到事實表需要幾秒鐘。請務必在之後更新事實表統計資訊。

CREATE PARTITION FUNCTION PF_TEXT(varchar(20))
   AS RANGE RIGHT 
   FOR VALUES ('A', 'B', 'C', 'D', 'E', 'M', 'S');
CREATE PARTITION SCHEME PS_TEXT
   AS PARTITION PF_TEXT
   ALL TO ([PRIMARY]);

CREATE TABLE dbo.fact_partitioned(
   Col1 int NOT NULL,
   Col2 int NOT NULL,
   TEXT varchar(20) NOT NULL
       INDEX cdx CLUSTERED ON PS_TEXT (TEXT)
) ON PS_TEXT (TEXT);

--load some example data into fact table
WITH 
    t10 AS (SELECT n FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t(n))
   ,t1k AS (SELECT 0 AS n FROM t10 AS a CROSS JOIN t10 AS b CROSS JOIN t10 AS c)
   ,t1g AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS num FROM t1k AS a CROSS JOIN t1k AS b CROSS JOIN t1k AS c)
INSERT INTO dbo.fact_partitioned WITH(TABLOCKX) (Col1, Col2, TEXT)
SELECT 1,1,'A'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'B'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'C'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'D'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'E'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'M'
FROM t1g
WHERE num <= 1000000
UNION ALL
SELECT 1,1,'S'
FROM t1g
WHERE num <= 1000000;
GO

--example ETL objects and data
DROP TABLE IF EXISTS dbo.fact_partitioned_staging;

--staging table must have same indexes as target (aligned)
CREATE TABLE dbo.fact_partitioned_staging(
   Col1 int NOT NULL,
   Col2 int NOT NULL,
   TEXT varchar(20) NOT NULL
       INDEX cdx CLUSTERED ON PS_TEXT (TEXT)
) ON PS_TEXT (TEXT);

WITH 
    t10 AS (SELECT n FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t(n))
   ,t1k AS (SELECT 0 AS n FROM t10 AS a CROSS JOIN t10 AS b CROSS JOIN t10 AS c)
   ,t1g AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS num FROM t1k AS a CROSS JOIN t1k AS b CROSS JOIN t1k AS c)
INSERT INTO dbo.fact_partitioned_staging WITH(TABLOCKX) (Col1, Col2, TEXT)
--replaces existing partition
SELECT 2,2,'C'
FROM t1g
WHERE num <= 1000000
UNION ALL
--new partion BETWEEN 'E' and 'M'
SELECT 2,2,'G'
FROM t1g
WHERE num <= 1000000
--new partion AFTER 'S'
UNION ALL
SELECT 2,2,'X'
FROM t1g
WHERE num <= 1000000;
GO

--dynamic list partitioning load
SET XACT_ABORT ON; --best practice with explict transactions
DECLARE @SQL nvarchar(MAX);
BEGIN TRY

   BEGIN TRAN;

   --get distinct incremental load TEXT values from staging
   DECLARE @incremental_load_boundaries TABLE(
         incremental_load_boundary varchar(20) NOT NULL PRIMARY KEY
       );
   INSERT INTO @incremental_load_boundaries(incremental_load_boundary)
       SELECT DISTINCT TEXT
       FROM dbo.fact_partitioned_staging;

   --recreate new partition function/scheme with existing partition boundaries plus new distinct TEXT values
   IF EXISTS(SELECT 1 FROM sys.partition_schemes WHERE name = N'PS_TEXT_STAGING')
       DROP PARTITION SCHEME PS_TEXT_STAGING;
   IF EXISTS(SELECT 1 FROM sys.partition_functions WHERE name = N'PF_TEXT_STAGING')
       DROP PARTITION FUNCTION PF_TEXT_STAGING;
   SELECT @SQL =
       N'CREATE PARTITION FUNCTION PF_TEXT_STAGING(varchar(20)) AS RANGE RIGHT FOR VALUES ('
       + STRING_AGG(QUOTENAME(boundary,''''),',')
       + N');'
   FROM (
       --existing target table boundaries
       SELECT CAST(value AS varchar(20)) AS boundary
       FROM sys.partition_range_values
       WHERE function_id = (SELECT function_id FROM sys.partition_functions WHERE name = N'PF_TEXT')
       UNION
       --staging table boundaries
       SELECT incremental_load_boundary
       FROM @incremental_load_boundaries
       ) AS boundaries;
   PRINT @SQL;
   EXECUTE sp_executesql @sql;
   CREATE PARTITION SCHEME PS_TEXT_STAGING AS PARTITION PF_TEXT_STAGING ALL TO ([PRIMARY]);

   --partition staging table using new partition scheme with end state boundaries
   CREATE CLUSTERED INDEX cdx ON dbo.fact_partitioned_staging(TEXT)
       WITH(DROP_EXISTING=ON)
       ON PS_TEXT_STAGING(TEXT);

   --SPLIT existing partition function to add new boundaries
   --no data movement needed when existing partition column values exactly match existing boundaries
   SELECT @SQL = STRING_AGG(
       'ALTER PARTITION SCHEME PS_TEXT NEXT USED [PRIMARY];ALTER PARTITION FUNCTION PF_TEXT() SPLIT RANGE('
       + QUOTENAME(new_boundaries.boundary, '''')
       + N')'
       , ';')
   FROM (
       --existing target table boundaries
       SELECT CAST(value AS varchar(20)) AS boundary
       FROM sys.partition_range_values
       WHERE function_id = (SELECT function_id FROM sys.partition_functions WHERE name = N'PF_TEXT_STAGING')
       EXCEPT
       --all boundaries
       SELECT CAST(value AS varchar(20)) AS boundary
       FROM sys.partition_range_values
       WHERE function_id = (SELECT function_id FROM sys.partition_functions WHERE name = N'PF_TEXT')
       ) AS new_boundaries
   PRINT @SQL;
   EXECUTE sp_executesql @sql;

   --truncate partitions to be reloaded
   SELECT @SQL =
       N'TRUNCATE TABLE dbo.fact_partitioned WITH(PARTITIONS(' 
       + STRING_AGG(CAST(partition_number AS varchar(10)),',')
       + N'));'
   FROM (
       SELECT DISTINCT $PARTITION.PF_TEXT(incremental_load_boundary) AS partition_number
       FROM @incremental_load_boundaries
       ) AS existing_boundaries;
   PRINT @SQL
   EXECUTE sp_executesql @sql;

   --switch in incremental load partitions
   SELECT @SQL = STRING_AGG(
       N'ALTER TABLE dbo.fact_partitioned_staging SWITCH PARTITION ' 
       + CAST(partition_number AS varchar(10))
       + N' TO dbo.fact_partitioned PARTITION '
       + CAST(partition_number AS varchar(10))
       , ';')
       + N';'
   FROM (
       SELECT DISTINCT $PARTITION.PF_TEXT(incremental_load_boundary) AS partition_number
       FROM @incremental_load_boundaries
       ) AS existing_boundaries;
   PRINT @SQL
   EXECUTE sp_executesql @sql;

   --assert staging table is now empty
   IF EXISTS(SELECT 1 FROM dbo.fact_partitioned_staging)
       RAISERROR('Assertion failed: staging table is not empty',16,1);

   COMMIT;

END TRY
BEGIN CATCH
   IF @@ROWCOUNT > 0 ROLLBACK;
   THROW;
END CATCH;
GO

引用自:https://dba.stackexchange.com/questions/259786