Backup

是否可以複製 RedShift 數據庫?

  • May 28, 2021

對於一些測試和開發工作,我們希望能夠定期複製現有的 RedShift 數據庫(所有模式和所有數據)。類似於備份恢復的東西。

我能找到的最好的方法是從創建整個集群的快照開始,這在此時是不切實際的。

我們確實有用於創建數據庫和架構的腳本,但我們沒有用於從另一個 AWS RedShfit 數據庫填充表的簡單/整潔的機制。 (我們發現不能使用INSERT INTO db_new.schema.table SELECT * FROM db_old.schema.table,因為不支持跨數據庫語法)

有沒有人有什麼建議?

您可以執行以下操作:

對於開發大小的數據集,此(解除安裝/複製)將相當快。

我只是花了很多時間做這件事。這是有問題的,因為 postgres 工具沒有擷取 sort 和 dist 鍵資訊。稍後我會將完整的腳本發佈到我們的公共 github 儲存庫,但現在這裡有一個腳本,它將所有表定義拉入創建表語句。它還沒有解決列壓縮類型或主鍵,但它至少允許我們在另一個數據庫中重新創建表。填充它們是一個簡單的腳本解除安裝和載入語句的問題。由於 Redshift sql 的許多限制,該腳本並不漂亮,我花了太多時間查看它,但它執行良好。

select tm.schemaname||'.'||tm.tablename, 'create table '||tm.schemaname||'.'||tm.tablename
 ||' ('
 ||cp.coldef
 -- primary key
 -- dist key
 || nvl(d.distkey,'')
 --sort key 
 || nvl((select 
        ' sortkey(' ||substr(array_to_string(
                    array( select ','||cast(column_name as varchar(100))  as str from
                          (select column_name from information_schema.columns col where  col.table_schema= tm.schemaname and col.table_name=tm.tablename) c2
                           join 
                           (-- gives sort cols
                             select attrelid as tableid, attname as colname, attsortkeyord as sort_col_order from pg_attribute pa where 
                             pa.attnum > 0  AND NOT pa.attisdropped AND pa.attsortkeyord > 0
                           ) st on tm.tableid=st.tableid and c2.column_name=st.colname   order by sort_col_order
                         )
                   ,'')
                 ,2,10000) || ')'
  )
 ,'') ||';'
 from 
-- t  master table list
(
SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid 
FROM pg_namespace n, pg_class c
WHERE n.oid = c.relnamespace 
 AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema')
) tm 
-- cp  creates the col params for the create string
join
(select 
 substr(str,(charindex('QQQ',str)+3),(charindex('ZZZ',str))-(charindex('QQQ',str)+3)) as tableid
 ,substr(replace(replace(str,'ZZZ',''),'QQQ'||substr(str,(charindex('QQQ',str)+3),(charindex('ZZZ',str))-(charindex('QQQ',str)+3)),''),2,10000) as coldef
from
( select array_to_string(array(
 SELECT  'QQQ'||cast(t.tableid as varchar(10))||'ZZZ'|| ','||column_name||' '|| decode(udt_name,'bpchar','char',udt_name) || decode(character_maximum_length,null,'', '('||cast(character_maximum_length as varchar(9))||')'   )
 -- default
 || decode(substr(column_default,2,8),'identity','',null,'',' default '||column_default||' ')
 -- nullable
 || decode(is_nullable,'YES',' NULL ','NO',' NOT NULL ') 
 -- identity 
 || decode(substr(column_default,2,8),'identity',' identity('||substr(column_default,(charindex('''',column_default)+1), (length(column_default)-charindex('''',reverse(column_default))-charindex('''',column_default)   ) )  ||') ', '') as str 
  from  
 -- ci  all the col info
 (
 select cast(t.tableid as int), cast(table_schema as varchar(100)), cast(table_name as varchar(100)), cast(column_name as varchar(100)), 
 cast(ordinal_position as int), cast(column_default as varchar(100)), cast(is_nullable as varchar(20)) , cast(udt_name as varchar(50))  ,cast(character_maximum_length as int),
  sort_col_order  , decode(d.colname,null,0,1) dist_key 
   from (select * from information_schema.columns c where  c.table_schema= t.schemaname and c.table_name=t.tablename) c
 left join 
 (-- gives sort cols
 select attrelid as tableid, attname as colname, attsortkeyord as sort_col_order from pg_attribute a where 
  a.attnum > 0  AND NOT a.attisdropped AND a.attsortkeyord > 0
 ) s on t.tableid=s.tableid and c.column_name=s.colname
 left join 
 -- gives dist col
 (select attrelid as tableid, attname as colname from pg_attribute a where
  a.attnum > 0 AND NOT a.attisdropped  AND a.attisdistkey = 't'
 ) d on t.tableid=d.tableid and c.column_name=d.colname
 order by ordinal_position
 ) ci 
 -- for the working array funct
 ), '') as str
from 
(-- need tableid
SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid 
FROM pg_namespace n, pg_class c
WHERE n.oid = c.relnamespace 
  AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema')
) t 
)) cp on tm.tableid=cp.tableid
-- add in primary key query here
-- dist key
left join
(  select 
 -- close off the col defs after the primary key 
 ')' ||
 ' distkey('|| cast(column_name as varchar(100)) ||')'  as distkey, t.tableid
 from information_schema.columns c
 join 
 (-- need tableid
 SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid 
 FROM pg_namespace n, pg_class c
 WHERE n.oid = c.relnamespace 
   AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema')
 ) t on c.table_schema= t.schemaname and c.table_name=t.tablename
 join 
 -- gives dist col
 (select attrelid as tableid, attname as colname from pg_attribute a where
  a.attnum > 0 AND NOT a.attisdropped  AND a.attisdistkey = 't'
 ) d on t.tableid=d.tableid and c.column_name=d.colname
) d on tm.tableid=d.tableid 

引用自:https://dba.stackexchange.com/questions/55228