Sql-Server

您應該多久更新一次統計數據?

  • June 25, 2019

您應該多久更新一次統計數據?什麼是“太少”?“太頻繁”有多頻繁?

答案是“這取決於”您的數據庫、使用者、數據等。

所以我試著在兩張表中記錄我們的統計數據隨著時間的推移是什麼樣的。他們來了:

DROP TABLE /*IF EXISTS */ dbo.dm_db_stats_histogram
DROP TABLE /*IF EXISTS */ dbo.dm_db_stats_properties
go
CREATE TABLE dbo.dm_db_stats_properties(
 dm_db_stats_propertiesID INT IDENTITY(1,1) NOT NULL constraint PK_dm_db_stats_properties PRIMARY KEY CLUSTERED,
 DatabaseId INT NOT NULL,
 object_id int NOT NULL,
 stats_id  int NOT NULL,
 last_updated  DATETIME2 NOT NULL,
 rows  BIGINT NOT NULL,
 rows_sampled  BIGINT NOT NULL,
 steps int NOT NULL,
 unfiltered_rows   BIGINT NOT NULL,
 modification_counter  BIGINT NOT NULL,
 persisted_sample_percent  FLOAT  NULL
 , SampleDate DATETIME2 NOT NULL CONSTRAINT df_dm_db_stats_properties_SampleDate DEFAULT SYSUTCDATETIME()
)
GO
ALTER TABLE dbo.dm_db_stats_properties ADD StatsName NVARCHAR(128) NOT NULL CONSTRAINT df_dm_db_stats_properties_StatsName DEFAULT ('')

GO
CREATE TABLE dbo.dm_db_stats_histogram(
 dm_db_stats_histogramID INT IDENTITY(1,1) NOT NULL constraint PK_dm_db_stats_histogram PRIMARY KEY CLUSTERED,
 dm_db_stats_propertiesID INT NOT NULL, 
 object_id int NOT NULL,
 stats_id  int NOT NULL,
 step_number   int NOT NULL,
 range_high_key    sql_variant NOT NULL,
 range_rows    real    NOT NULL,
 equal_rows    real    NOT NULL,
 distinct_range_rows   bigint  NOT NULL,
 average_range_rows    REAL NOT NULL
)
go
ALTER TABLE dbo.dm_db_stats_histogram ADD CONSTRAINT fk_dm_db_stats_properties FOREIGN KEY(dm_db_stats_propertiesID) REFERENCES dbo.dm_db_stats_properties(dm_db_stats_propertiesID)
ALTER TABLE dbo.dm_db_stats_histogram ALTER COLUMN range_high_key SQL_VARIANT NULL
GO

這是我用來記錄統計數據的程式碼:

SET NOCOUNT ON
BEGIN TRY
 DROP TABLE #Stat_Header
END TRY
BEGIN CATCH
END CATCH
CREATE TABLE #Stat_Header (Name sysname, Updated DATETIME, Rows BIGINT, Rows_Sampled BIGINT, Steps SMALLINT, Density REAL, AverageKeyLength INT, StringIndex varchar(10)
, FilterExpression varchar(8000), unfiltered_rows bigint, persisted_sample_percent float)

BEGIN TRY
 DROP TABLE #Histogram
END TRY
BEGIN CATCH
END CATCH
CREATE TABLE #Histogram (Step_Number INT IDENTITY(1,1), range_high_key SQL_VARIANT, range_rows REAL NOT NULL, equal_rows    REAL NOT NULL, distinct_range_rows BIGINT NOT NULL, average_range_rows REAL NOT NULL)


DECLARE TableCursor CURSOR LOCAL STATIC FOR 
   SELECT t.name AS TableName, sc.name AS SchemaName
   FROM sys.tables t 
   INNER JOIN sys.schemas sc ON sc.schema_id = t.schema_id
   ORDER BY sc.name, t.name

DECLARE @sql NVARCHAR(MAX) = '', @TableName VARCHAR(100), @SchemaName VARCHAR(100), @loopCounter INT =0
SELECT GETDATE() AS StartDate
OPEN TableCursor
WHILE 1 =1 BEGIN
   FETCH TableCursor INTO @TableName, @SchemaName
   IF @@fetch_status <> 0 BREAK

   SELECT @sql = 'declare @Scope_Identity int = 0, @RowCount int
   SET NOCOUNT ON' + CHAR(13)
   SELECT @sql += '
   TRUNCATE TABLE #Stat_Header
   TRUNCATE TABLE #Histogram 
   INSERT INTO #Stat_Header(Name, Updated, Rows, Rows_Sampled, Steps, Density, AverageKeyLength, StringIndex, FilterExpression, unfiltered_rows/*, persisted_sample_percent*/)
   exec (''DBCC SHOW_STATISTICS ("' + @SchemaName + '.' + @TableName + '", "' + s.name +'") with STAT_HEADER'')
   INSERT INTO dbo.dm_db_stats_properties(databaseid, object_id, stats_id, last_updated, rows, rows_sampled, steps, unfiltered_rows, modification_counter, persisted_sample_percent, SampleDate, StatsName)
   SELECT db_id(), ' + LTRIM(t.object_id) + ', ' + LTRIM(s.stats_id) +', coalesce(sh.Updated, ''2000-01-01''), isnull(sh.rows,0), isnull(sh.Rows_Sampled,0), isnull(sh.steps,0), isnull(sh.unfiltered_rows,0), 0, sh.persisted_sample_percent, cast(''' + LTRIM(SYSUTCDATETIME()) + ''' as datetime2(7)), ''' + s.name + '''
   FROM #Stat_Header sh
   LEFT JOIN dbo.dm_db_stats_properties sp ON sp.object_id=' + LTRIM(t.object_id) + ' AND sp.stats_id=' + LTRIM(s.stats_id) + ' AND sh.Updated=sp.last_updated
   WHERE sp.dm_db_stats_propertiesID IS NULL
   SELECT @Scope_Identity = SCOPE_IDENTITY(), @RowCount=@@ROWCOUNT
   IF @RowCount>0 BEGIN 
       --raiserror (''here'', 10, 1) with nowait
       INSERT INTO #Histogram(range_high_key, range_rows, equal_rows, distinct_range_rows, average_range_rows)
       exec (''DBCC SHOW_STATISTICS ("' + @SchemaName + '.' + @TableName + '", "' + s.name +'") with HISTOGRAM'')
       INSERT INTO dbo.dm_db_stats_histogram(dm_db_stats_propertiesID, object_id, stats_id, step_number, range_high_key, range_rows, equal_rows, distinct_range_rows, average_range_rows)
       SELECT @Scope_Identity, ' + LTRIM(t.object_id) + ', ' + LTRIM(s.stats_id) +', h.Step_Number, h.range_high_key, h.range_rows, h.equal_rows, h.distinct_range_rows, h.average_range_rows
       FROM #Histogram h
   END 
   raiserror (''table = ' + @TableName + ', ' + s.name + ', rc= %i '', 10, 1, @RowCount) with nowait
   waitfor delay ''00:00:01''
   '
   FROM sys.stats AS s
   INNER JOIN sys.tables t ON t.object_id = s.object_id
   INNER JOIN sys.schemas sc ON sc.schema_id = t.schema_id
   WHERE t.name=@TableName
   AND sc.name = @SchemaName
   IF @loopCounter < 1 EXEC dbo.LongPrint @String=@sql
   SET @loopCounter +=1
   EXEC sp_executesql @sql
   --BREAK
END
DEALLOCATE TableCursor
SELECT GETDATE() AS StopDate

我還有一個在 SQL Server 2016+17 上使用新 DMV 的解決方案

exec sp_foreachdb @command = N'
use ?
DECLARE @SampleDate DATETIME2 = SYSUTCDATETIME()

 INSERT INTO master.dbo.dm_db_stats_properties(DatabaseID, object_id, stats_id, last_updated, rows, rows_sampled, steps, unfiltered_rows, modification_counter, persisted_sample_percent, SampleDate, StatsName)
 SELECT db_id() as DatabaseID, s.object_id, s.stats_id, sp.last_updated, sp.rows, sp.rows_sampled, sp.steps, sp.unfiltered_rows, sp.modification_counter, sp.persisted_sample_percent, @SampleDate, s.name
 FROM ?.sys.stats AS s
 INNER JOIN ?.sys.tables t ON t.object_id = s.object_id
 INNER JOIN ?.sys.schemas sc ON sc.schema_id = t.schema_id
 CROSS APPLY ?.sys.dm_db_stats_properties(s.object_id, s.stats_id) AS sp
 LEFT JOIN master.dbo.dm_db_stats_properties T1 ON T1.object_id = s.object_id AND T1.stats_id = s.stats_id AND T1.last_updated=sp.last_updated
 WHERE sp.last_updated IS NOT NULL
 AND T1.last_updated IS NULL
 select @@rowcount as r1

INSERT INTO master.dbo.dm_db_stats_histogram(dm_db_stats_propertiesID, object_id, stats_id, step_number, range_high_key, range_rows, equal_rows, distinct_range_rows, average_range_rows)
SELECT sp.dm_db_stats_propertiesID, sp.object_id, sp.stats_id, hist.step_number, hist.range_high_key, hist.range_rows, hist.equal_rows, hist.distinct_range_rows, hist.average_range_rows
FROM master.dbo.dm_db_stats_properties sp
CROSS APPLY ?.sys.dm_db_stats_histogram(sp.[object_id], sp.stats_id) AS hist
WHERE sp.SampleDate = @SampleDate
 select @@rowcount as r2

', @exclude_list='tempdb, model', @print_dbname=1

我真正的問題

現在,我如何根據收集的數據編寫查詢,這將向我展示

a) 在我的採樣期間哪些表、索引、列(不)發生很大變化?

b) 哪些表、索引、列受益於 WITH FULLSCAN 命令?

b) 哪些表、索引、列受益於 WITH FULLSCAN 命令?

從我的角度來看,您沒有收集正確的數據來回答這個問題。如果您正在尋找僅通過分析數據庫統計資訊就可以做出的改進,我只能想到使用抽樣統計資訊而不是可能導致的兩個查詢性能問題FULLSCAN

  1. 密度至少下降了一個數量級。

某些數據分佈不適合 SQL Server 在將採樣數據轉換為直方圖時所做的某些假設。在這些情況下,您最終可能會得到 10 倍、100 倍甚至更多的密度。這可能會導致在統計對像中使用密度向量的查詢出現性能問題。

您可以通過保存所有採樣統計資訊的密度資訊、使用 FULLSCAN 收集所有相關列的統計資訊以及比較兩個結果集之間的密度來搜尋可能的問題。任何太不准確的東西都是從完整收集統計數據中受益的候選者。 2. 該查詢容易受到升序鍵問題的影響。

您已將 SQL Server 2008 列為標記,因此這仍然可能與您相關。考慮一個在插入行時儲存日期時間的列。如果您有在該列上過濾以查找最近數據的查詢,他們可能正在搜尋直方圖之外的數據。使用舊版 CE,您最終可能會得到非常低的基數估計,這可能會導致查詢性能問題。

這可以通過 FULLSCAN 統計數據來解決,儘管對我來說感覺有點矯枉過正。您可以使用相關數據類型(希望無需擔心 VARCHAR 的升序​​鍵)在所有統計資訊上完整收集統計資訊,並查看最大高鍵如何變化。

對於上述兩個問題,我想不出一種僅通過查看抽樣統計數據以程式方式找到它們的方法。這就是為什麼我說你沒有收集正確的數據來回答你的問題。

如果您關心我的意見,真正最小化統計維護的方法是分析性能不佳的查詢的工作負載,仔細分析根本原因以確定統計問題何時起作用,確定準確的統計類型問題,最後相應調整統計維護工作。

我自己的步驟

我在這方面花了很多時間,但到目前為止最有用的是一個查詢,它向我顯示了步驟數差異最大的表/列/索引:

drop table /* if exists */ #inv
create table #inv(dm_db_stats_propertiesID int, DatabaseID int, object_id int, stats_id int, Steps varchar(100), dm_db_stats_propertiesIDs varchar(400)
, DatabaseName sysname, StatsName varchar(100) null, TableName varchar(100) null, ColumnName varchar(100) null, minSteps int, maxSteps int)

insert into #inv( dm_db_stats_propertiesID, DatabaseID, object_id, stats_id, Steps, dm_db_stats_propertiesIDs, DatabaseName, StatsName, minSteps, maxSteps)
SELECT dm_db_stats_propertiesID, DatabaseID, object_id, stats_id, Steps, dm_db_stats_propertiesIDs, a.Name as DatabaseName, StatsName, minSteps, maxSteps
from (
   SELECT top (10000) L.Name, sp.*
   , stuff((select ', ' +ltrim(steps) from dbo.dm_db_stats_properties s2 where s2.DatabaseID=sp.DatabaseID and s2.object_id = sp.object_id and s2.stats_id=sp.stats_id for xml path('')),1,2,'') as Steps2
   , stuff((select ', ' + ltrim(dm_db_stats_propertiesID) from dbo.dm_db_stats_properties s2 where s2.DatabaseID=sp.DatabaseID and s2.object_id = sp.object_id and s2.stats_id=sp.stats_id for xml path('')),1,2,'') as dm_db_stats_propertiesIDs
   , (select count(*) as rowcnt from dbo.dm_db_stats_properties s2 where s2.DatabaseID=sp.DatabaseID and s2.object_id = sp.object_id and s2.stats_id=sp.stats_id ) as rowcnt
   , (select min(Steps) as minSteps from dbo.dm_db_stats_properties s2 where s2.DatabaseID=sp.DatabaseID and s2.object_id = sp.object_id and s2.stats_id=sp.stats_id ) as minSteps
   , (select max(Steps) as maxSteps from dbo.dm_db_stats_properties s2 where s2.DatabaseID=sp.DatabaseID and s2.object_id = sp.object_id and s2.stats_id=sp.stats_id ) as maxSteps
   FROM dbo.dm_db_stats_properties sp
   inner join #List L on sp.DatabaseID=L.DatabaseID and sp.SampleDate>=L.SampleDate
) as a
where a.Steps2 like '%,%' --',%'

exec sp_foreachdb 'update #inv set TableName = t.name /* select * */ from #inv i inner join ?.sys.tables t on i.object_id = t.object_id and (''['' + i.DatabaseName + '']'') = ''?'''

exec sp_foreachdb 'update #inv set ColumnName = c.name from #inv i
inner join ?.sys.columns c on i.object_id=c.object_id and c.column_id = convert(int, convert(varbinary, SUBSTRING(i.StatsName, 9, 8),2)) and (''['' + i.DatabaseName + '']'') = ''?''
where i.StatsName like ''_WA_Sys%''
'

select * , (0.0+maxSteps-minSteps)/maxSteps * 100  as PctChange
from #inv
where minSteps <> maxSteps
and DatabaseName <> 'master'
AND TableName NOT LIKE 'dm_db_stats%'
order by PctChange desc

幾乎不是我所希望的。

引用自:https://dba.stackexchange.com/questions/226800