Backup
是否可以複製 RedShift 數據庫?
對於一些測試和開發工作,我們希望能夠定期複製現有的 RedShift 數據庫(所有模式和所有數據)。類似於備份恢復的東西。
我能找到的最好的方法是從創建整個集群的快照開始,這在此時是不切實際的。
我們確實有用於創建數據庫和架構的腳本,但我們沒有用於從另一個 AWS RedShfit 數據庫填充表的簡單/整潔的機制。 (我們發現不能使用
INSERT INTO db_new.schema.table SELECT * FROM db_old.schema.table
,因為不支持跨數據庫語法)有沒有人有什麼建議?
您可以執行以下操作:
UNLOAD
您的數據到 S3 ( http://docs.aws.amazon.com/redshift/latest/dg/r_UNLOAD.html )- 針對新數據庫執行部署腳本
COPY
將數據導入新數據庫 ( http://docs.aws.amazon.com/redshift/latest/dg/r_COPY.html )對於開發大小的數據集,此(解除安裝/複製)將相當快。
我只是花了很多時間做這件事。這是有問題的,因為 postgres 工具沒有擷取 sort 和 dist 鍵資訊。稍後我會將完整的腳本發佈到我們的公共 github 儲存庫,但現在這裡有一個腳本,它將所有表定義拉入創建表語句。它還沒有解決列壓縮類型或主鍵,但它至少允許我們在另一個數據庫中重新創建表。填充它們是一個簡單的腳本解除安裝和載入語句的問題。由於 Redshift sql 的許多限制,該腳本並不漂亮,我花了太多時間查看它,但它執行良好。
select tm.schemaname||'.'||tm.tablename, 'create table '||tm.schemaname||'.'||tm.tablename ||' (' ||cp.coldef -- primary key -- dist key || nvl(d.distkey,'') --sort key || nvl((select ' sortkey(' ||substr(array_to_string( array( select ','||cast(column_name as varchar(100)) as str from (select column_name from information_schema.columns col where col.table_schema= tm.schemaname and col.table_name=tm.tablename) c2 join (-- gives sort cols select attrelid as tableid, attname as colname, attsortkeyord as sort_col_order from pg_attribute pa where pa.attnum > 0 AND NOT pa.attisdropped AND pa.attsortkeyord > 0 ) st on tm.tableid=st.tableid and c2.column_name=st.colname order by sort_col_order ) ,'') ,2,10000) || ')' ) ,'') ||';' from -- t master table list ( SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid FROM pg_namespace n, pg_class c WHERE n.oid = c.relnamespace AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema') ) tm -- cp creates the col params for the create string join (select substr(str,(charindex('QQQ',str)+3),(charindex('ZZZ',str))-(charindex('QQQ',str)+3)) as tableid ,substr(replace(replace(str,'ZZZ',''),'QQQ'||substr(str,(charindex('QQQ',str)+3),(charindex('ZZZ',str))-(charindex('QQQ',str)+3)),''),2,10000) as coldef from ( select array_to_string(array( SELECT 'QQQ'||cast(t.tableid as varchar(10))||'ZZZ'|| ','||column_name||' '|| decode(udt_name,'bpchar','char',udt_name) || decode(character_maximum_length,null,'', '('||cast(character_maximum_length as varchar(9))||')' ) -- default || decode(substr(column_default,2,8),'identity','',null,'',' default '||column_default||' ') -- nullable || decode(is_nullable,'YES',' NULL ','NO',' NOT NULL ') -- identity || decode(substr(column_default,2,8),'identity',' identity('||substr(column_default,(charindex('''',column_default)+1), (length(column_default)-charindex('''',reverse(column_default))-charindex('''',column_default) ) ) ||') ', '') as str from -- ci all the col info ( select cast(t.tableid as int), cast(table_schema as varchar(100)), cast(table_name as varchar(100)), cast(column_name as varchar(100)), cast(ordinal_position as int), cast(column_default as varchar(100)), cast(is_nullable as varchar(20)) , cast(udt_name as varchar(50)) ,cast(character_maximum_length as int), sort_col_order , decode(d.colname,null,0,1) dist_key from (select * from information_schema.columns c where c.table_schema= t.schemaname and c.table_name=t.tablename) c left join (-- gives sort cols select attrelid as tableid, attname as colname, attsortkeyord as sort_col_order from pg_attribute a where a.attnum > 0 AND NOT a.attisdropped AND a.attsortkeyord > 0 ) s on t.tableid=s.tableid and c.column_name=s.colname left join -- gives dist col (select attrelid as tableid, attname as colname from pg_attribute a where a.attnum > 0 AND NOT a.attisdropped AND a.attisdistkey = 't' ) d on t.tableid=d.tableid and c.column_name=d.colname order by ordinal_position ) ci -- for the working array funct ), '') as str from (-- need tableid SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid FROM pg_namespace n, pg_class c WHERE n.oid = c.relnamespace AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema') ) t )) cp on tm.tableid=cp.tableid -- add in primary key query here -- dist key left join ( select -- close off the col defs after the primary key ')' || ' distkey('|| cast(column_name as varchar(100)) ||')' as distkey, t.tableid from information_schema.columns c join (-- need tableid SELECT substring(n.nspname,1,100) as schemaname, substring(c.relname,1,100) as tablename, c.oid as tableid FROM pg_namespace n, pg_class c WHERE n.oid = c.relnamespace AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema') ) t on c.table_schema= t.schemaname and c.table_name=t.tablename join -- gives dist col (select attrelid as tableid, attname as colname from pg_attribute a where a.attnum > 0 AND NOT a.attisdropped AND a.attisdistkey = 't' ) d on t.tableid=d.tableid and c.column_name=d.colname ) d on tm.tableid=d.tableid