Sql-Server
檢查分區函式是否存在值列表 ,如果不遍歷列表並將新的分區值添加到分區函式?
我有一個帶有分區表的 azure sql db,該分區表在 varchar 數據類型上具有分區函式。
在 Etl 過程中,我將獲得一個分區值列表作為我需要的參數:
- 檢查分區函式是否存在分區值,如果不存在則遍歷值列表並添加到分區函式。
- 對於作為參數提供的分區值列表,遍歷 $partition 函式以將 partitionids 作為逗號分隔列表獲取到新變數中
- 使用以下命令截斷分區表:truncate parttable with partitions (@newvariable)
到目前為止,我在程式碼中得到的是:
-- Table, Partition Function, Partition Scheme CREATE PARTITION FUNCTION PF_TEXT(varchar(20)) AS RANGE RIGHT FOR VALUES ('A', 'B', 'C', 'D', 'E') CREATE PARTITION SCHEME PS_TEXT AS PARTITION PF_TEXT ALL TO ([PRIMARY]) CREATE TABLE [dbo].[fact_partitioned]( [Col1] [int], NOT NULL [Col2] [int], NOT NULL [TEXT] [varchar(20)] NOT NULL ) ON PS_TEXT ([TEXT]) --------------------------- DECLARE @cmd nvarchar(max); DECLARE @partitions nvarchar(max); DECLARE @partitionids nvarchar(max); SET @partitions = 'A','B','C' -- Code to check Partition Function for @partitions values exist? -- For @partitions values that don't exist, iterate @partitions values and add to partition function. --code to iterate @partitions values over $partition.PF_TEXT(@partition) add result to @partitionids SET @cmd = N'TRUNCATE TABLE dbo.pt WITH (PARTITIONS (' + @partitionids + N'));'; PRINT @cmd; --EXEC sys.sp_executesql @cmd;
SQL Server 表分區本身就是范圍分區。儘管您可以實現列表分區(即分區列與邊界完全匹配),但請注意確保邊界精確匹配以避免在
SPLIT
操作過程中進行過多的數據移動和記錄。CHECK
考慮在與邊界值匹配的事實表上創建一個約束(例如TEXT IN('A','B','C', …
)。以下是基於 DDL 和您問題中的範例數據的範例腳本,它假設(至少)事實表上存在聚集索引。我添加了一些額外的邊界來說明可以在 ETL 過程中引入現有邊界之間的新邊界,無需移動數據,假設分區列值與邊界完全匹配。有關每個步驟的詳細資訊,請參閱內聯註釋。
在我的測試中,從 staging 轉移到事實表需要幾秒鐘。請務必在之後更新事實表統計資訊。
CREATE PARTITION FUNCTION PF_TEXT(varchar(20)) AS RANGE RIGHT FOR VALUES ('A', 'B', 'C', 'D', 'E', 'M', 'S'); CREATE PARTITION SCHEME PS_TEXT AS PARTITION PF_TEXT ALL TO ([PRIMARY]); CREATE TABLE dbo.fact_partitioned( Col1 int NOT NULL, Col2 int NOT NULL, TEXT varchar(20) NOT NULL INDEX cdx CLUSTERED ON PS_TEXT (TEXT) ) ON PS_TEXT (TEXT); --load some example data into fact table WITH t10 AS (SELECT n FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t(n)) ,t1k AS (SELECT 0 AS n FROM t10 AS a CROSS JOIN t10 AS b CROSS JOIN t10 AS c) ,t1g AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS num FROM t1k AS a CROSS JOIN t1k AS b CROSS JOIN t1k AS c) INSERT INTO dbo.fact_partitioned WITH(TABLOCKX) (Col1, Col2, TEXT) SELECT 1,1,'A' FROM t1g WHERE num <= 1000000 UNION ALL SELECT 1,1,'B' FROM t1g WHERE num <= 1000000 UNION ALL SELECT 1,1,'C' FROM t1g WHERE num <= 1000000 UNION ALL SELECT 1,1,'D' FROM t1g WHERE num <= 1000000 UNION ALL SELECT 1,1,'E' FROM t1g WHERE num <= 1000000 UNION ALL SELECT 1,1,'M' FROM t1g WHERE num <= 1000000 UNION ALL SELECT 1,1,'S' FROM t1g WHERE num <= 1000000; GO --example ETL objects and data DROP TABLE IF EXISTS dbo.fact_partitioned_staging; --staging table must have same indexes as target (aligned) CREATE TABLE dbo.fact_partitioned_staging( Col1 int NOT NULL, Col2 int NOT NULL, TEXT varchar(20) NOT NULL INDEX cdx CLUSTERED ON PS_TEXT (TEXT) ) ON PS_TEXT (TEXT); WITH t10 AS (SELECT n FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t(n)) ,t1k AS (SELECT 0 AS n FROM t10 AS a CROSS JOIN t10 AS b CROSS JOIN t10 AS c) ,t1g AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS num FROM t1k AS a CROSS JOIN t1k AS b CROSS JOIN t1k AS c) INSERT INTO dbo.fact_partitioned_staging WITH(TABLOCKX) (Col1, Col2, TEXT) --replaces existing partition SELECT 2,2,'C' FROM t1g WHERE num <= 1000000 UNION ALL --new partion BETWEEN 'E' and 'M' SELECT 2,2,'G' FROM t1g WHERE num <= 1000000 --new partion AFTER 'S' UNION ALL SELECT 2,2,'X' FROM t1g WHERE num <= 1000000; GO --dynamic list partitioning load SET XACT_ABORT ON; --best practice with explict transactions DECLARE @SQL nvarchar(MAX); BEGIN TRY BEGIN TRAN; --get distinct incremental load TEXT values from staging DECLARE @incremental_load_boundaries TABLE( incremental_load_boundary varchar(20) NOT NULL PRIMARY KEY ); INSERT INTO @incremental_load_boundaries(incremental_load_boundary) SELECT DISTINCT TEXT FROM dbo.fact_partitioned_staging; --recreate new partition function/scheme with existing partition boundaries plus new distinct TEXT values IF EXISTS(SELECT 1 FROM sys.partition_schemes WHERE name = N'PS_TEXT_STAGING') DROP PARTITION SCHEME PS_TEXT_STAGING; IF EXISTS(SELECT 1 FROM sys.partition_functions WHERE name = N'PF_TEXT_STAGING') DROP PARTITION FUNCTION PF_TEXT_STAGING; SELECT @SQL = N'CREATE PARTITION FUNCTION PF_TEXT_STAGING(varchar(20)) AS RANGE RIGHT FOR VALUES (' + STRING_AGG(QUOTENAME(boundary,''''),',') + N');' FROM ( --existing target table boundaries SELECT CAST(value AS varchar(20)) AS boundary FROM sys.partition_range_values WHERE function_id = (SELECT function_id FROM sys.partition_functions WHERE name = N'PF_TEXT') UNION --staging table boundaries SELECT incremental_load_boundary FROM @incremental_load_boundaries ) AS boundaries; PRINT @SQL; EXECUTE sp_executesql @sql; CREATE PARTITION SCHEME PS_TEXT_STAGING AS PARTITION PF_TEXT_STAGING ALL TO ([PRIMARY]); --partition staging table using new partition scheme with end state boundaries CREATE CLUSTERED INDEX cdx ON dbo.fact_partitioned_staging(TEXT) WITH(DROP_EXISTING=ON) ON PS_TEXT_STAGING(TEXT); --SPLIT existing partition function to add new boundaries --no data movement needed when existing partition column values exactly match existing boundaries SELECT @SQL = STRING_AGG( 'ALTER PARTITION SCHEME PS_TEXT NEXT USED [PRIMARY];ALTER PARTITION FUNCTION PF_TEXT() SPLIT RANGE(' + QUOTENAME(new_boundaries.boundary, '''') + N')' , ';') FROM ( --existing target table boundaries SELECT CAST(value AS varchar(20)) AS boundary FROM sys.partition_range_values WHERE function_id = (SELECT function_id FROM sys.partition_functions WHERE name = N'PF_TEXT_STAGING') EXCEPT --all boundaries SELECT CAST(value AS varchar(20)) AS boundary FROM sys.partition_range_values WHERE function_id = (SELECT function_id FROM sys.partition_functions WHERE name = N'PF_TEXT') ) AS new_boundaries PRINT @SQL; EXECUTE sp_executesql @sql; --truncate partitions to be reloaded SELECT @SQL = N'TRUNCATE TABLE dbo.fact_partitioned WITH(PARTITIONS(' + STRING_AGG(CAST(partition_number AS varchar(10)),',') + N'));' FROM ( SELECT DISTINCT $PARTITION.PF_TEXT(incremental_load_boundary) AS partition_number FROM @incremental_load_boundaries ) AS existing_boundaries; PRINT @SQL EXECUTE sp_executesql @sql; --switch in incremental load partitions SELECT @SQL = STRING_AGG( N'ALTER TABLE dbo.fact_partitioned_staging SWITCH PARTITION ' + CAST(partition_number AS varchar(10)) + N' TO dbo.fact_partitioned PARTITION ' + CAST(partition_number AS varchar(10)) , ';') + N';' FROM ( SELECT DISTINCT $PARTITION.PF_TEXT(incremental_load_boundary) AS partition_number FROM @incremental_load_boundaries ) AS existing_boundaries; PRINT @SQL EXECUTE sp_executesql @sql; --assert staging table is now empty IF EXISTS(SELECT 1 FROM dbo.fact_partitioned_staging) RAISERROR('Assertion failed: staging table is not empty',16,1); COMMIT; END TRY BEGIN CATCH IF @@ROWCOUNT > 0 ROLLBACK; THROW; END CATCH; GO