Sql-Server

數據倉庫:使用動態 Sql 為任何表自動生成漸變維度程式碼

  • January 2, 2022

有人可以通過程式碼審查來獲取 OLTP/Stage 數據並將其轉換為數據倉庫 Kimball 緩慢變化的維度類型 2 嗎?此程式碼應該是動態的,並且應該能夠使用任何表格作為輸入。

我一直在網上搜尋類似於 Ola Hallengren、Brent Ozar 或開源的程式碼,以將數據表述為緩慢變化的維度。SQL 臨時表僅適用於 ETL 日期,而不是業務日期(而且我有積壓的數據)。一些 OLTP 供應商不允許在其結構上使用臨時表。

我們實際上有 100 數百張表需要放入漸變維度中。數據有重複或重複,其他時候列需要重命名/或從緩慢變化的維度中排除。搜尋自動程式碼生成器。將它們全部編碼需要很長時間。

有人可以做得更好,更多參數化,改進和增強(希望開源)。在這方面工作了幾個星期。隨意嘗試/編輯,或在下面寫一個新答案。

create procedure dbo.Dim_Type2_GenerateCode
   @TableNameSource varchar(255),
   @TableSourceLoadDate varchar(255) = 'LoadDatetime',

   @NaturalKey varchar(255),
   @NaturalBeginDateChange varchar(255),
   @NaturalEndDateChange varchar(255) = null,

   @RepeatedDataFlag bit = 0,
   @TempTableFlag bit = 0,
   @ColumnExcludeList varchar(max) = null

as

set nocount on

-- Purpose:  Generate slowly changing dimensions for any table
-- Note:
-- In case of Column Renames or joins between Staging and Dimension, utilize MapVw, eg FoodMapVw, CustomerMapVw, ProductMapVw
-- To Include only certain columns between Staging and Dimension, utilize MapVw, eg FoodMapVw, CustomerMapVw, ProductMapVw
-- To Load data into smaller temp table to prevent querying repeatedly, utilize @TempTableFlag = 1
-- Column Exclusion parameter can be added to exclude certain columns
-- Works with three table types below: Transaction Dates, Tables with Beg/EndDates already, Tables with RepeatedData

--      ProductId ProductName  TransactionDate      ProductId    ProductName   TransactionDate  EndEffDate           ProductId     ProductName   TransactionDate (Repeated)
--         1      Apple         4/1/2018              1             Apple         4/1/2018       4/3/2018               1              Apple           4/1/2018
--         1      Apple         4/3/2018              1             Apple         4/3/2018       4/7/2018               1              Apple           4/1/2018
--         1      Apple         4/7/2018              1             Apple         4/12/2018      12/31/999              1              Apple           4/2/2018
--                                                                                                                      1              Apple           4/3/2018

set @TableNameSource = right(@TableNameSource, len(@TableNameSource) - charindex('.', @TableNameSource))
declare @StageTableName varchar(255)
declare @DimTableName varchar(255)
set @TempTableFlag = (case when @NaturalBeginDateChange is null or @NaturalEndDateChange is null or @RepeatedDataFlag = 1 then 1 else 0 end)



DECLARE @ColumnExcludeTable table(ColumnExcludeValue varchar(500) not null);
insert into @ColumnExcludeTable(ColumnExcludeValue)
select ltrim(rtrim(value)) as ColumnExcludeValue from string_split(@ColumnExcludeList, ',');



declare @TempTableDeclare varchar(max) = 'create table #'+@TableNameSource+
'
( 
   ' + @TableNameSource + '_id bigint primary key identity(1,1),' + 
   (select STUFF((
   SELECT ', 
   '    
   + c.name + ' ' + 
case 
   when t.name like '%char%' then t.name + '(' + cast(c.max_length as varchar(10)) + ')' 
   when t.name like '%numeric%' or t.name like '%decimal%' then t.name + '(' + cast(c.precision as varchar(10)) + ',' + cast(c.scale as varchar(10)) + ')'
   else t.name
end
FROM .sys.columns c 
inner JOIN sys.types t
   on t.user_type_id = c.user_type_id
   and t.system_type_id = c.system_type_id
where c.object_id = object_id(@TableNameSource) and is_identity = 0
   and c.name not like '%@NaturalKey%' 
   and c.name not like '%EndEffDate%' 
   and c.name <> @NaturalBeginDateChange
   and c.name not in (select columnexcludetable.ColumnExcludeValue from @ColumnExcludeTable columnexcludetable)
FOR XML PATH(''), TYPE).value('.','nvarchar(max)'),1,2,''))
+ ' 
   ,BegEffDatetime datetime
   ,EndEffDatetime datetime
)'  


declare @TempTableCodeInsert varchar(max)=
   (select STUFF((
   SELECT ', 
   '    
   + QUOTENAME(c.name) 
FROM .sys.columns c 
inner JOIN sys.types t
   on t.user_type_id = c.user_type_id
   and t.system_type_id = c.system_type_id
where c.object_id = object_id(@TableNameSource) and is_identity = 0
   and c.name not like '%BegEffDate%' 
   and c.name not like '%EndEffDate%' 
   and c.name <> @NaturalBeginDateChange
   and c.name not in (select columnexcludetable.ColumnExcludeValue from @ColumnExcludeTable columnexcludetable)
FOR XML PATH(''), TYPE).value('.','nvarchar(max)'),1,2,''))

declare @TempTableCodeInsertNoLoadDate varchar(max) = (replace(@TempTableCodeInsert,quotename(@TableSourceLoadDate),''))
set @TempTableCodeInsertNoLoadDate = LEFT(@TempTableCodeInsertNoLoadDate, LEN(@TempTableCodeInsertNoLoadDate)-5)
--set @TempTableCodeInsertNoLoadDate = (replace(@TempTableCodeInsertNoLoadDate,quotename(@NaturalBeginDateChange),''))


declare @ColumnListNoPrimary varchar(max) = 
   (select STUFF((
   SELECT ', 
       '    
   + QUOTENAME(c.name) 
   FROM .sys.columns c 
   where c.object_id = object_id(@TableNameSource) and is_identity = 0
   FOR XML PATH(''), TYPE).value('.','nvarchar(max)'),1,2,''))


declare @ColumnListNoPrimaryNoTimeStamp varchar(max) = 
   (select STUFF((
   SELECT ', 
       '    
   + QUOTENAME(c.name) 
   FROM .sys.columns c 
   where c.object_id = object_id(@TableNameSource) and is_identity = 0
   and c.name <> @NaturalKey
   and c.name not like '%BeginEffDate%' 
   and c.name not like '%EndEffDate%' 
   and c.name not like '%CreateDatetime%' 
   and c.name not like '%UpdateDatetime%'
   and c.name not like '%Loaddate%'
   and c.name <> @NaturalBeginDateChange
   and c.name not in (select columnexcludetable.ColumnExcludeValue from @ColumnExcludeTable columnexcludetable)
   FOR XML PATH(''), TYPE).value('.','nvarchar(max)'),1,2,''))



declare @ColumnListNoPrimaryCompareCheck varchar(max) = 
   (select STUFF((
   SELECT ' or
       '    
   + 'stg.'+ QUOTENAME(c.name) + ' <> dim.' + QUOTENAME(c.name) 
   FROM .sys.columns c 
   where c.object_id = object_id(@TableNameSource) and is_identity = 0
   and c.name <> @NaturalKey
   and c.name not like '%BeginEffDate%' 
   and c.name not like '%EndEffDate%' 
   and c.name not like '%CreateDatetime%' 
   and c.name not like '%UpdateDatetime%'
   and c.name not like '%Loaddate%'
   and c.name <> @NaturalBeginDateChange
   and c.name not in (select columnexcludetable.ColumnExcludeValue from @ColumnExcludeTable columnexcludetable)
   FOR XML PATH(''), TYPE).value('.','nvarchar(max)'),1,3,''))



set @StageTableName = REPLACE(@TableNameSource,'Dim','Stage')
set @StageTableName = (case when @TempTableFlag = 1 then '#' + @StageTableName else 'dbo.' + @StageTableName end)

set @DimTableName = REPLACE(@TableNameSource,'Stage','Dim')
set @DimTableName = REPLACE(@DimTableName,'MapVw','')
set @DimTableName = (case when left(@DimTableName,3) <> 'Dim' then 'Dim_' + @DimTableName else @DimTableName end)

set @NaturalBeginDateChange = (case when @TempTableFlag = 1 then @NaturalBeginDateChange  else @NaturalBeginDateChange end)
declare @NaturalBeginDateChangeRename varchar(255) = (case when @TempTableFlag = 1 then 'BegEffDatetime'  else @NaturalBeginDateChange end)



declare @TableListGenerateCode nvarchar(max) = 

'create procedure dbo.' + @DimTableName +'Update
   @LoadDatetimeParam datetime
as

declare @NewDatetime datetime = getdate() 

'
+ 

'--Declare temp data
' + 
case when @TempTableFlag = 1 

then 
+ @TempTableDeclare + '

insert into ' + @StageTableName + 
'
(       
' + @TempTableCodeInsert + '
   ,BegEffDatetime
   ,EndEffDatetime
)' + 
case when @RepeatedDataFlag = 1
then 
'select 
   ' + replace(@TempTableCodeInsert,quotename(@TableSourceLoadDate),'min('+quotename(@TableSourceLoadDate)+')') + '
   ,min(' + @NaturalBeginDateChange + ') as BegEffDatetime  
   ,case when max(stg.' + @NaturalBeginDateChange + ') = (select max(substage.' + @NaturalBeginDateChange + ') from ' + @TableNameSource + ' substage where stg.'+ @NaturalKey + ' = substage.' + @NaturalKey + ') then ''12/31/9999'' else max(stg.' + @NaturalBeginDateChange + ') end as EndDate
from ' + @TableNameSource + ' stg
where LoadDatetime > @LoadDatetimeParam
group by ' + @TempTableCodeInsertNoLoaddate
when @RepeatedDataFlag = 0 
then
'
select
   distinct'
+  @TempTableCodeInsert + '
   ,' + @NaturalBeginDateChange + ' as BegEffDatetime
   ,ISNULL(Lead(' + @NaturalBeginDateChange + ' + 1) Over (partition by ' + @NaturalKey + ' order by ' + @NaturalBeginDateChange + ' asc), ''12/31/9999'') as EndEffDatetime'


+ '
from ' + @TableNameSource +
'
where LoadDatetime > @LoadDatetimeParam'
end
else 
''
end 
+ ' 
--End temp data section '  + '


--Begin Transaction

begin transaction

   --Close off existing records that have changed

   update dbo.' + @DimTableName + '
   set
       UpdateDatetime = @NewDatetime,
       EndEffDatetime = (select min(' + @NaturalBeginDateChangeRename + ') from ' + @StageTableName +' substage where stg.' + @NaturalKey + ' = substage.' + @NaturalKey + ')
   from dbo.' + @DimTableName + ' dim
   inner join ' + @StageTableName + ' stg
       on dim.' + @NaturalKey + ' = stg.' + @NaturalKey + '
       and dim.EndEffDatetime = ''12/31/9999''
   where 
   stg.LoadDatetime > @LoadDatetimeParam
   and ('
   + @ColumnListNoPrimaryCompareCheck +


   ')
   --Insert new updated records

   insert into dbo.' + @DimTableName + 
   '
   (   
       ' + @NaturalKey  + ',
       ' + @ColumnListNoPrimaryNoTimeStamp + ',' +
       '
       [CreateDatetime],
       [UpdateDatetime],
       [BegEffDatetime],
       [EndEffDatetime]
   )
   select  
       stg.' + @NaturalKey +',' 
       + replace(@ColumnListNoPrimaryNoTimeStamp,'[','stg.[') + 
       '
       ,@newdatetime as CreateDatetime
       ,@newdatetime as UpdateDatetime
       ,stg.BegEffDatetime as BegEffDatetime
       ,stg.EndEffDatetime as EndEffDatetime
   from '  + @StageTableName + ' stg
   inner join ' + @DimTableName + ' dim
       on dim.' + @NaturalKey + ' = stg.' + @NaturalKey + '
       and dim.UpdateDatetime = @NewDatetime
   --Verify Updates
   where 
       stg.LoadDatetime > @LoadDatetimeParam
       and ('
   + @ColumnListNoPrimaryCompareCheck + ')

   --Insert New Business Key records which do not exist

   UNION ALL
       select
       stg.' + @NaturalKey +',
       ' + replace(@ColumnListNoPrimaryNoTimeStamp,'[','stg.[') + 
       '
       ,@newdatetime as CreateDatetime
       ,@newdatetime as UpdateDatetime
       ,stg.BegEffDatetime as BegEffDatetime
       ,''12/31/9999'' as EndEffDate
   from '  + @StageTableName + ' stg
   left join dbo.' + @DimTableName + ' dim
       on dim.' + @NaturalKey + ' = stg.' + @NaturalKey + '
   where dim.' + @NaturalKey + ' is null   

commit transaction

--end code'

-- Print columns in three steps, print and select only can print first 8000 characters
print substring(@TableListGenerateCode,charindex('--Create procedure',@TableListGenerateCode),charindex('--Declare temp data',@TableListGenerateCode))
print substring(@TableListGenerateCode,charindex('--Declare temp data',@TableListGenerateCode),charindex('--End temp data',@TableListGenerateCode)-charindex('--Declare temp data',@TableListGenerateCode))
print substring(@TableListGenerateCode,charindex('--Begin transaction',@TableListGenerateCode),charindex('--Close off existing records',@TableListGenerateCode)-charindex('--Begin Transaction',@TableListGenerateCode))
print  '    ' + substring(@TableListGenerateCode,charindex('--Close off existing records',@TableListGenerateCode),charindex('--Insert new updated records',@TableListGenerateCode)-charindex('--Close off existing records',@TableListGenerateCode))
print  '    ' + substring(@TableListGenerateCode,charindex('--Insert new updated records',@TableListGenerateCode),charindex('--Verify updates',@TableListGenerateCode) -charindex('--Insert new updated records',@TableListGenerateCode) )
print  '    ' + substring(@TableListGenerateCode,charindex('--Verify updates',@TableListGenerateCode),charindex('--Insert New Business',@TableListGenerateCode) -charindex('--Verify updates',@TableListGenerateCode) )
print  '    ' + substring(@TableListGenerateCode,charindex('--Insert New Business',@TableListGenerateCode),charindex('--end code',@TableListGenerateCode)-charindex('--Insert New Business',@TableListGenerateCode))

測試場景: 員工在雜貨店不斷貼錯標籤。跟踪歷史。

drop table [dbo].[Stage_Food]
create table [dbo].[Stage_Food]
(
   [Stagefoodid] int identity(1,1),
   [FoodNaturalId] [int] NOT NULL,
   [FoodName] [varchar](255) NULL,
   [FoodCategory] [varchar](255) NULL,
   [FoodTransactionDate] datetime,
   [LoadDatetime] datetime
   primary key clustered ([Stagefoodid] ASC)
)

drop table dbo.[Dim_food]
create table [dbo].[Dim_food]
(
   [DimFoodId] [int] IDENTITY(1,1) NOT NULL,
   [FoodNaturalId] [int] NULL,
   [FoodName] [varchar](255) NULL,
   [FoodCategory] [varchar](255) NULL,
   [BegEffDatetime] [datetime] NULL,
   [EndEffDatetime] [datetime] NULL,
   [CreateDatetime] [datetime] NULL,
   [Updatedatetime] [datetime] NULL
   primary key clustered ( [DimFoodId] ASC)
) 


exec dbo.Dim_Type2_GenerateCode
   @TableNameSource = 'Stage_Food',
   @TableSourceLoadDate = 'LoadDatetime',

   @NaturalKey = 'FoodNaturalId',
   @NaturalBeginDateChange = 'FoodTransactionDate'



insert into dbo.Stage_Food
values (1,'Apple','Vegetable','5/2/2018','5/4/2018')

exec dbo.Dim_FoodUpdate '5/3/2018'

select * from dbo.Dim_food


insert into dbo.Stage_Food
values (2,'Bread','Meat','5/3/2018','5/5/2018')

exec dbo.Dim_FoodUpdate '5/4/2018'

select * from dbo.Dim_food

insert into dbo.Stage_Food
values (1,'Apple','Candy','5/8/2018','5/9/2018')

dbo.Dim_FoodUpdate '5/8/2018'

select * from dbo.Dim_food


insert into dbo.Stage_Food
values (2,'Bread','Grain','5/8/2018','5/10/2018')


dbo.Dim_FoodUpdate '5/9/2018'

select * from dbo.Dim_food


---------------------------------------
-- Test2 Repeated data flag
---------------------------------------


exec dbo.Dim_Type2_GenerateCode
   @TableNameSource = 'Stage_Food',
   @TableSourceLoadDate = 'LoadDatetime',

   @NaturalKey = 'FoodNaturalId',
   @NaturalBeginDateChange = 'FoodTransactionDate',
   @RepeatedDataFlag = 1,
   @TempTableFlag  = 1


insert into dbo.Stage_Food
values (1,'Apple','Vegetable','5/2/2018','5/4/2018'),
(1,'Apple','Vegetable','5/2/2018','5/4/2018')

exec dbo.Dim_FoodUpdate '5/3/2018'

select * from dbo.Dim_food


insert into dbo.Stage_Food
values (2,'Bread','Meat','5/2/2018','5/5/2018'),
(2,'Bread','Meat','5/3/2018','5/5/2018')



exec dbo.Dim_FoodUpdate '5/4/2018'

select * from dbo.Dim_food

insert into dbo.Stage_Food
values (1,'Apple','Candy','5/6/2018','5/9/2018'),
(1,'Apple','Candy','5/7/2018','5/9/2018'),
(1,'Apple','Candy','5/8/2018','5/9/2018')

dbo.Dim_FoodUpdate '5/8/2018'

select * from dbo.Dim_food


insert into dbo.Stage_Food
values (2,'Bread','Grain','5/8/2018','5/10/2018')


dbo.Dim_FoodUpdate '5/9/2018'

select * from dbo.Dim_food

您可以測試更多項目、臨時表標誌、列排除列表、映射視圖

為此目的,有大量的 ETL 框架,儘管我懷疑它們中的大多數是專有的,或者特定於一種類型的數據源。

您可能想查看 Biml,它允許您在 SSIS 中動態建構 ETL 流。

尋找自動程式碼生成器

您想要一個基於模板的程式碼生成器。

我只知道 Oracle 的那些。但是,長期目標是支持多個 RDBMS。將這些視為真正需要的“起點”。

這些在 GitHub 上

  • FTLDB基於 Apache FTL 項目。這可能是你最好的起點。
  • oddgen是 SQLDeveloper 的外掛。它使用 eXtend 作為內部程式碼生成器。
  • tePLSQL是一個純 PL/SQL 程式碼生成器。我正在更新這個並且很想與想要將它翻譯成 T-SQL 的人一起工作。

編輯:附加說明

看起來您可以將客戶端版本FTLDB用於任何具有 JDBC 驅動程序的 RDBMS。我還沒有嘗試過。

引用自:https://dba.stackexchange.com/questions/224367