Sql-Server

使用析取條件加速復雜連接

  • September 26, 2019

我有一個表格,其中包含有關使用者訂單及其電話號碼的資訊。該表如下所示:

table Orders (
   OrderID int,
   UserName varchar(50),
   PhoneNumber1 varchar(50),
   PhoneNumber2 varchar(50),
   PhoneNumber3 varchar(50)
)

問題:固定一個使用者u,統計至少共享一個電話號碼的不同使用者的數量u。換句話說,計算v滿足以下一個或多個條件的不同使用者的數量:

  1. u.PhoneNumber1 = v.PhoneNumber1
  2. u.PhoneNumber2 = v.PhoneNumber2
  3. u.PhoneNumber3 = v.PhoneNumber3

我的解決方案(OrderID 是主鍵,我在group by子句中添加了其他列只是為了在結果中顯示它們):

select o1.OrderID, 
      o1.PhoneNumber1, 
      o1.PhoneNumber2, 
      o1.PhoneNumber3, 
      count(distinct o2.UserName)
from Orders o1
inner join Orders o2
   on o1.PhoneNumber1 = o2.PhoneNumber1
   or o1.PhoneNumber2 = o2.PhoneNumber2
   or o1.PhoneNumber3 = o2.PhoneNumber3
group by o1.OrderID, o1.PhoneNumber1, o1.PhoneNumber2, o1.PhoneNumber3

Orders表包含大約 300K 條記錄,而我的查詢需要太多時間才能完成(我估計大約 3 小時,但我沒有讓它完全完成)。

我可以採用哪些策略來加快計算速度?例如,是否存在產生相同結果的等效語句?

注意:我嘗試在列上創建三個非聚集索引,PhoneNumber但沒有註意到顯著的改進。

下面使用 3 個隨機“電話”號碼創建 300000 個虛擬訂單,然後找到匹配項,應該可以幫助您入門。

use tempdb
GO

drop table if EXISTS Orders 
GO


create table Orders (
   OrderID int primary key,
   UserName varchar(50),
   PhoneNumber1 varchar(50),
   PhoneNumber2 varchar(50),
   PhoneNumber3 varchar(50)
)

-- generate 300000 with randon "phone" numbers

;WITH TallyTable AS (
SELECT TOP 300000 ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS [N]
 FROM dbo.syscolumns tb1,dbo.syscolumns tb2 
)
insert into Orders
select n, 'user' + cast(n as varchar(10)), cast(CRYPT_GEN_RANDOM(3) as int), cast(CRYPT_GEN_RANDOM(3) as int), cast(CRYPT_GEN_RANDOM(3) as int)
FROM TallyTable;

/*

-- original query, takes a long time

select o1.OrderID, 
      o1.PhoneNumber1, 
      o1.PhoneNumber2, 
      o1.PhoneNumber3, 
      count(distinct o2.UserName)
from Orders o1
inner join Orders o2
   on o1.PhoneNumber1 = o2.PhoneNumber1
   or o1.PhoneNumber2 = o2.PhoneNumber2
   or o1.PhoneNumber3 = o2.PhoneNumber3
group by o1.OrderID, o1.PhoneNumber1, o1.PhoneNumber2, o1.PhoneNumber3
*/

-- which users share the same phonenumber1
select o1.UserName as username, 'phonenumber1' as phonenumber, o2.UserName as sharedwith
from Orders o1
inner join Orders o2
   on o1.PhoneNumber1 = o2.PhoneNumber1
   and o1.UserName <> o2.username -- make sure it's a different user
union ALL
-- which orders share the same phonenumber2
select o1.UserName, 'phonenumber2',  o2.UserName
from Orders o1
inner join Orders o2
   on o1.PhoneNumber2 = o2.PhoneNumber2
   and o1.UserName <> o2.username -- make sure it's a different user
union ALL
-- which orders share the same phonenumber3
select o1.UserName, 'phonenumber3',  o2.UserName
from Orders o1
inner join Orders o2
   on o1.PhoneNumber3 = o2.PhoneNumber3
   and o1.UserName <> o2.username -- make sure it's a different user
order by username asc


-- use the above as a cte to do some counting
; with matches as (
   select o1.UserName as username, 'phonenumber1' as phonenumber, o2.UserName as sharedwith
   from Orders o1
   inner join Orders o2
       on o1.PhoneNumber1 = o2.PhoneNumber1
       and o1.UserName <> o2.username -- make sure it's a different user
   union ALL
   -- which orders share the same phonenumber2
   select o1.UserName, 'phonenumber2',  o2.UserName
   from Orders o1
   inner join Orders o2
       on o1.PhoneNumber2 = o2.PhoneNumber2
       and o1.UserName <> o2.username -- make sure it's a different user
   union ALL
   -- which orders share the same phonenumber3
   select o1.UserName, 'phonenumber3',  o2.UserName
   from Orders o1
   inner join Orders o2
       on o1.PhoneNumber3 = o2.PhoneNumber3
       and o1.UserName <> o2.username -- make sure it's a different user
)
SELECT matches.username, COUNT(*) AS matches
from matches
group by username
order by 2 desc

引用自:https://dba.stackexchange.com/questions/249680