您應該多久更新一次統計數據?
您應該多久更新一次統計數據?什麼是“太少”?“太頻繁”有多頻繁?
答案是“這取決於”您的數據庫、使用者、數據等。
所以我試著在兩張表中記錄我們的統計數據隨著時間的推移是什麼樣的。他們來了:
DROP TABLE /*IF EXISTS */ dbo.dm_db_stats_histogram DROP TABLE /*IF EXISTS */ dbo.dm_db_stats_properties go CREATE TABLE dbo.dm_db_stats_properties( dm_db_stats_propertiesID INT IDENTITY(1,1) NOT NULL constraint PK_dm_db_stats_properties PRIMARY KEY CLUSTERED, DatabaseId INT NOT NULL, object_id int NOT NULL, stats_id int NOT NULL, last_updated DATETIME2 NOT NULL, rows BIGINT NOT NULL, rows_sampled BIGINT NOT NULL, steps int NOT NULL, unfiltered_rows BIGINT NOT NULL, modification_counter BIGINT NOT NULL, persisted_sample_percent FLOAT NULL , SampleDate DATETIME2 NOT NULL CONSTRAINT df_dm_db_stats_properties_SampleDate DEFAULT SYSUTCDATETIME() ) GO ALTER TABLE dbo.dm_db_stats_properties ADD StatsName NVARCHAR(128) NOT NULL CONSTRAINT df_dm_db_stats_properties_StatsName DEFAULT ('') GO CREATE TABLE dbo.dm_db_stats_histogram( dm_db_stats_histogramID INT IDENTITY(1,1) NOT NULL constraint PK_dm_db_stats_histogram PRIMARY KEY CLUSTERED, dm_db_stats_propertiesID INT NOT NULL, object_id int NOT NULL, stats_id int NOT NULL, step_number int NOT NULL, range_high_key sql_variant NOT NULL, range_rows real NOT NULL, equal_rows real NOT NULL, distinct_range_rows bigint NOT NULL, average_range_rows REAL NOT NULL ) go ALTER TABLE dbo.dm_db_stats_histogram ADD CONSTRAINT fk_dm_db_stats_properties FOREIGN KEY(dm_db_stats_propertiesID) REFERENCES dbo.dm_db_stats_properties(dm_db_stats_propertiesID) ALTER TABLE dbo.dm_db_stats_histogram ALTER COLUMN range_high_key SQL_VARIANT NULL GO
這是我用來記錄統計數據的程式碼:
SET NOCOUNT ON BEGIN TRY DROP TABLE #Stat_Header END TRY BEGIN CATCH END CATCH CREATE TABLE #Stat_Header (Name sysname, Updated DATETIME, Rows BIGINT, Rows_Sampled BIGINT, Steps SMALLINT, Density REAL, AverageKeyLength INT, StringIndex varchar(10) , FilterExpression varchar(8000), unfiltered_rows bigint, persisted_sample_percent float) BEGIN TRY DROP TABLE #Histogram END TRY BEGIN CATCH END CATCH CREATE TABLE #Histogram (Step_Number INT IDENTITY(1,1), range_high_key SQL_VARIANT, range_rows REAL NOT NULL, equal_rows REAL NOT NULL, distinct_range_rows BIGINT NOT NULL, average_range_rows REAL NOT NULL) DECLARE TableCursor CURSOR LOCAL STATIC FOR SELECT t.name AS TableName, sc.name AS SchemaName FROM sys.tables t INNER JOIN sys.schemas sc ON sc.schema_id = t.schema_id ORDER BY sc.name, t.name DECLARE @sql NVARCHAR(MAX) = '', @TableName VARCHAR(100), @SchemaName VARCHAR(100), @loopCounter INT =0 SELECT GETDATE() AS StartDate OPEN TableCursor WHILE 1 =1 BEGIN FETCH TableCursor INTO @TableName, @SchemaName IF @@fetch_status <> 0 BREAK SELECT @sql = 'declare @Scope_Identity int = 0, @RowCount int SET NOCOUNT ON' + CHAR(13) SELECT @sql += ' TRUNCATE TABLE #Stat_Header TRUNCATE TABLE #Histogram INSERT INTO #Stat_Header(Name, Updated, Rows, Rows_Sampled, Steps, Density, AverageKeyLength, StringIndex, FilterExpression, unfiltered_rows/*, persisted_sample_percent*/) exec (''DBCC SHOW_STATISTICS ("' + @SchemaName + '.' + @TableName + '", "' + s.name +'") with STAT_HEADER'') INSERT INTO dbo.dm_db_stats_properties(databaseid, object_id, stats_id, last_updated, rows, rows_sampled, steps, unfiltered_rows, modification_counter, persisted_sample_percent, SampleDate, StatsName) SELECT db_id(), ' + LTRIM(t.object_id) + ', ' + LTRIM(s.stats_id) +', coalesce(sh.Updated, ''2000-01-01''), isnull(sh.rows,0), isnull(sh.Rows_Sampled,0), isnull(sh.steps,0), isnull(sh.unfiltered_rows,0), 0, sh.persisted_sample_percent, cast(''' + LTRIM(SYSUTCDATETIME()) + ''' as datetime2(7)), ''' + s.name + ''' FROM #Stat_Header sh LEFT JOIN dbo.dm_db_stats_properties sp ON sp.object_id=' + LTRIM(t.object_id) + ' AND sp.stats_id=' + LTRIM(s.stats_id) + ' AND sh.Updated=sp.last_updated WHERE sp.dm_db_stats_propertiesID IS NULL SELECT @Scope_Identity = SCOPE_IDENTITY(), @RowCount=@@ROWCOUNT IF @RowCount>0 BEGIN --raiserror (''here'', 10, 1) with nowait INSERT INTO #Histogram(range_high_key, range_rows, equal_rows, distinct_range_rows, average_range_rows) exec (''DBCC SHOW_STATISTICS ("' + @SchemaName + '.' + @TableName + '", "' + s.name +'") with HISTOGRAM'') INSERT INTO dbo.dm_db_stats_histogram(dm_db_stats_propertiesID, object_id, stats_id, step_number, range_high_key, range_rows, equal_rows, distinct_range_rows, average_range_rows) SELECT @Scope_Identity, ' + LTRIM(t.object_id) + ', ' + LTRIM(s.stats_id) +', h.Step_Number, h.range_high_key, h.range_rows, h.equal_rows, h.distinct_range_rows, h.average_range_rows FROM #Histogram h END raiserror (''table = ' + @TableName + ', ' + s.name + ', rc= %i '', 10, 1, @RowCount) with nowait waitfor delay ''00:00:01'' ' FROM sys.stats AS s INNER JOIN sys.tables t ON t.object_id = s.object_id INNER JOIN sys.schemas sc ON sc.schema_id = t.schema_id WHERE t.name=@TableName AND sc.name = @SchemaName IF @loopCounter < 1 EXEC dbo.LongPrint @String=@sql SET @loopCounter +=1 EXEC sp_executesql @sql --BREAK END DEALLOCATE TableCursor SELECT GETDATE() AS StopDate
我還有一個在 SQL Server 2016+17 上使用新 DMV 的解決方案
exec sp_foreachdb @command = N' use ? DECLARE @SampleDate DATETIME2 = SYSUTCDATETIME() INSERT INTO master.dbo.dm_db_stats_properties(DatabaseID, object_id, stats_id, last_updated, rows, rows_sampled, steps, unfiltered_rows, modification_counter, persisted_sample_percent, SampleDate, StatsName) SELECT db_id() as DatabaseID, s.object_id, s.stats_id, sp.last_updated, sp.rows, sp.rows_sampled, sp.steps, sp.unfiltered_rows, sp.modification_counter, sp.persisted_sample_percent, @SampleDate, s.name FROM ?.sys.stats AS s INNER JOIN ?.sys.tables t ON t.object_id = s.object_id INNER JOIN ?.sys.schemas sc ON sc.schema_id = t.schema_id CROSS APPLY ?.sys.dm_db_stats_properties(s.object_id, s.stats_id) AS sp LEFT JOIN master.dbo.dm_db_stats_properties T1 ON T1.object_id = s.object_id AND T1.stats_id = s.stats_id AND T1.last_updated=sp.last_updated WHERE sp.last_updated IS NOT NULL AND T1.last_updated IS NULL select @@rowcount as r1 INSERT INTO master.dbo.dm_db_stats_histogram(dm_db_stats_propertiesID, object_id, stats_id, step_number, range_high_key, range_rows, equal_rows, distinct_range_rows, average_range_rows) SELECT sp.dm_db_stats_propertiesID, sp.object_id, sp.stats_id, hist.step_number, hist.range_high_key, hist.range_rows, hist.equal_rows, hist.distinct_range_rows, hist.average_range_rows FROM master.dbo.dm_db_stats_properties sp CROSS APPLY ?.sys.dm_db_stats_histogram(sp.[object_id], sp.stats_id) AS hist WHERE sp.SampleDate = @SampleDate select @@rowcount as r2 ', @exclude_list='tempdb, model', @print_dbname=1
我真正的問題
現在,我如何根據收集的數據編寫查詢,這將向我展示
a) 在我的採樣期間哪些表、索引、列(不)發生很大變化?
b) 哪些表、索引、列受益於 WITH FULLSCAN 命令?
b) 哪些表、索引、列受益於 WITH FULLSCAN 命令?
從我的角度來看,您沒有收集正確的數據來回答這個問題。如果您正在尋找僅通過分析數據庫統計資訊就可以做出的改進,我只能想到使用抽樣統計資訊而不是可能導致的兩個查詢性能問題
FULLSCAN
:
- 密度至少下降了一個數量級。
某些數據分佈不適合 SQL Server 在將採樣數據轉換為直方圖時所做的某些假設。在這些情況下,您最終可能會得到 10 倍、100 倍甚至更多的密度。這可能會導致在統計對像中使用密度向量的查詢出現性能問題。
您可以通過保存所有採樣統計資訊的密度資訊、使用 FULLSCAN 收集所有相關列的統計資訊以及比較兩個結果集之間的密度來搜尋可能的問題。任何太不准確的東西都是從完整收集統計數據中受益的候選者。 2. 該查詢容易受到升序鍵問題的影響。
您已將 SQL Server 2008 列為標記,因此這仍然可能與您相關。考慮一個在插入行時儲存日期時間的列。如果您有在該列上過濾以查找最近數據的查詢,他們可能正在搜尋直方圖之外的數據。使用舊版 CE,您最終可能會得到非常低的基數估計,這可能會導致查詢性能問題。
這可以通過 FULLSCAN 統計數據來解決,儘管對我來說感覺有點矯枉過正。您可以使用相關數據類型(希望無需擔心 VARCHAR 的升序鍵)在所有統計資訊上完整收集統計資訊,並查看最大高鍵如何變化。
對於上述兩個問題,我想不出一種僅通過查看抽樣統計數據以程式方式找到它們的方法。這就是為什麼我說你沒有收集正確的數據來回答你的問題。
如果您關心我的意見,真正最小化統計維護的方法是分析性能不佳的查詢的工作負載,仔細分析根本原因以確定統計問題何時起作用,確定準確的統計類型問題,最後相應調整統計維護工作。
我自己的步驟
我在這方面花了很多時間,但到目前為止最有用的是一個查詢,它向我顯示了步驟數差異最大的表/列/索引:
drop table /* if exists */ #inv create table #inv(dm_db_stats_propertiesID int, DatabaseID int, object_id int, stats_id int, Steps varchar(100), dm_db_stats_propertiesIDs varchar(400) , DatabaseName sysname, StatsName varchar(100) null, TableName varchar(100) null, ColumnName varchar(100) null, minSteps int, maxSteps int) insert into #inv( dm_db_stats_propertiesID, DatabaseID, object_id, stats_id, Steps, dm_db_stats_propertiesIDs, DatabaseName, StatsName, minSteps, maxSteps) SELECT dm_db_stats_propertiesID, DatabaseID, object_id, stats_id, Steps, dm_db_stats_propertiesIDs, a.Name as DatabaseName, StatsName, minSteps, maxSteps from ( SELECT top (10000) L.Name, sp.* , stuff((select ', ' +ltrim(steps) from dbo.dm_db_stats_properties s2 where s2.DatabaseID=sp.DatabaseID and s2.object_id = sp.object_id and s2.stats_id=sp.stats_id for xml path('')),1,2,'') as Steps2 , stuff((select ', ' + ltrim(dm_db_stats_propertiesID) from dbo.dm_db_stats_properties s2 where s2.DatabaseID=sp.DatabaseID and s2.object_id = sp.object_id and s2.stats_id=sp.stats_id for xml path('')),1,2,'') as dm_db_stats_propertiesIDs , (select count(*) as rowcnt from dbo.dm_db_stats_properties s2 where s2.DatabaseID=sp.DatabaseID and s2.object_id = sp.object_id and s2.stats_id=sp.stats_id ) as rowcnt , (select min(Steps) as minSteps from dbo.dm_db_stats_properties s2 where s2.DatabaseID=sp.DatabaseID and s2.object_id = sp.object_id and s2.stats_id=sp.stats_id ) as minSteps , (select max(Steps) as maxSteps from dbo.dm_db_stats_properties s2 where s2.DatabaseID=sp.DatabaseID and s2.object_id = sp.object_id and s2.stats_id=sp.stats_id ) as maxSteps FROM dbo.dm_db_stats_properties sp inner join #List L on sp.DatabaseID=L.DatabaseID and sp.SampleDate>=L.SampleDate ) as a where a.Steps2 like '%,%' --',%' exec sp_foreachdb 'update #inv set TableName = t.name /* select * */ from #inv i inner join ?.sys.tables t on i.object_id = t.object_id and (''['' + i.DatabaseName + '']'') = ''?''' exec sp_foreachdb 'update #inv set ColumnName = c.name from #inv i inner join ?.sys.columns c on i.object_id=c.object_id and c.column_id = convert(int, convert(varbinary, SUBSTRING(i.StatsName, 9, 8),2)) and (''['' + i.DatabaseName + '']'') = ''?'' where i.StatsName like ''_WA_Sys%'' ' select * , (0.0+maxSteps-minSteps)/maxSteps * 100 as PctChange from #inv where minSteps <> maxSteps and DatabaseName <> 'master' AND TableName NOT LIKE 'dm_db_stats%' order by PctChange desc
幾乎不是我所希望的。