Slowly Changing Dimension Type II: My Simple Personal Alternative To Merge
I've been working with the SQL Server's merge statement for some time now. I truly believe it's a great feature. Alas, the world out there is not so perfect. You'll soon run into a few hurdles, sooner than you know it. They can range from cross server limitations, deadlocks due to performance degradation. The list goes on and on...
Google: sql server merge statement limitations
I found myself having to come up with a custom solution. yes, a lot has been written on SCD Type II. Reinvent the wheel much???
I will create two tables...
1. dbo.customer table. transactional table
2. dbo.dim_customer table dimension table
create table dbo.customer
(
customer_id int identity(1,1) not null,
customer_name varchar(50) null,
city varchar(50) null,
favorite_car varchar(50) null
)
create table dbo.dim_customer
(
customer_id int null,
customer_name varchar(50) null,
city varchar(50) null,
favorite_car varchar(50) null,
row_is_current int not null,
row_effective_date date not null,
row_expire_date date not null
)
1.
truncate table dbo.customer
dbcc checkident ('dbo.customer', reseed,1)
;
with
cte_customers as
(
select 'john' customer_name,'cape town' city,'jaguar' favorite_car union all
select 'paul' customer_name,'johannesburg' city,null union all
select 'peter' customer_name,'pretoria' city,null union all
select 'matthew' customer_name,'durban' city,null
)
insert into dbo.customer(customer_name,city,favorite_car)
select customer_name,city,favorite_car
from cte_customers
select * from dbo.customer
[img1]
declare @not_expired datetime
declare @load_time datetime
set @not_expired = cast('99991231' as date)
set @load_time = cast(getdate() as date)
update t set
t.row_is_current = -1,
t.row_expire_date = @load_time
from dbo.customer s
inner join dbo.dim_customer t on s.customer_id = t.customer_id
and t.row_is_current = 1 and t.row_expire_date = @not_expired
where t.city <> s.city
or t.favorite_car <> s.favorite_car
/*insert changes where row_is_current is -1*/
insert into dbo.dim_customer
select s.*,1 row_is_current,@load_time row_effective_date,@not_expired row_expire_date
from dbo.customer s
inner join dbo.dim_customer t on s.customer_id = t.customer_id
where t.row_is_current = -1 and t.row_expire_date = @load_time
/*update changes set row_is_current to 0 where row_is_current is -1*/
update t set
t.row_is_current = 0,
t.row_expire_date = @load_time
from dbo.dim_customer t
where t.row_is_current = -1 and t.row_expire_date = @load_time
/*new*/
insert into dbo.dim_customer
select s.*,1 row_is_current,@load_time row_effective_date,@not_expired row_expire_date
from dbo.customer s
left join dbo.dim_customer t on s.customer_id = t.customer_id
where t.customer_id is null
/*
deleted rows, how does everyone deal with these anyways
*/
;
with
cte_deleted_records as
(
select t.customer_id,row_is_current
from dbo.customer s
right join dbo.dim_customer t on s.customer_id = t.customer_id
where s.customer_id is null and row_is_current = 1
)
update dels
set dels.row_is_current = -999
--row_expire_date = @load_time /*maybe optional*/
from cte_deleted_records dels
select * from dbo.customer
select * from dbo.dim_customer
--truncate table dbo.dim_customer
[img2]
update dbo.customer
set favorite_car = 'jaguar'
where customer_id = 1
update dbo.customer
set favorite_car = 'mazda',
city = 'bloemfontein'
where customer_id = 2
insert into dbo.customer(customer_name,city,favorite_car)
values ('mary','port elizabeth','ford')
select * from dbo.customer
[img4]
Google: sql server merge statement limitations
I found myself having to come up with a custom solution. yes, a lot has been written on SCD Type II. Reinvent the wheel much???
I will create two tables...
1. dbo.customer table. transactional table
2. dbo.dim_customer table dimension table
create table dbo.customer
(
customer_id int identity(1,1) not null,
customer_name varchar(50) null,
city varchar(50) null,
favorite_car varchar(50) null
)
(
customer_id int null,
customer_name varchar(50) null,
city varchar(50) null,
favorite_car varchar(50) null,
row_is_current int not null,
row_effective_date date not null,
row_expire_date date not null
)
1.
truncate table dbo.customer
dbcc checkident ('dbo.customer', reseed,1)
;
with
cte_customers as
(
select 'john' customer_name,'cape town' city,'jaguar' favorite_car union all
select 'paul' customer_name,'johannesburg' city,null union all
select 'peter' customer_name,'pretoria' city,null union all
select 'matthew' customer_name,'durban' city,null
)
insert into dbo.customer(customer_name,city,favorite_car)
select customer_name,city,favorite_car
from cte_customers
select * from dbo.customer
[img1]
declare @not_expired datetime
declare @load_time datetime
set @not_expired = cast('99991231' as date)
set @load_time = cast(getdate() as date)
update t set
t.row_is_current = -1,
t.row_expire_date = @load_time
from dbo.customer s
inner join dbo.dim_customer t on s.customer_id = t.customer_id
and t.row_is_current = 1 and t.row_expire_date = @not_expired
where t.city <> s.city
or t.favorite_car <> s.favorite_car
/*insert changes where row_is_current is -1*/
insert into dbo.dim_customer
select s.*,1 row_is_current,@load_time row_effective_date,@not_expired row_expire_date
from dbo.customer s
inner join dbo.dim_customer t on s.customer_id = t.customer_id
where t.row_is_current = -1 and t.row_expire_date = @load_time
/*update changes set row_is_current to 0 where row_is_current is -1*/
update t set
t.row_is_current = 0,
t.row_expire_date = @load_time
from dbo.dim_customer t
where t.row_is_current = -1 and t.row_expire_date = @load_time
/*new*/
insert into dbo.dim_customer
select s.*,1 row_is_current,@load_time row_effective_date,@not_expired row_expire_date
from dbo.customer s
left join dbo.dim_customer t on s.customer_id = t.customer_id
where t.customer_id is null
/*
deleted rows, how does everyone deal with these anyways
*/
;
with
cte_deleted_records as
(
select t.customer_id,row_is_current
from dbo.customer s
right join dbo.dim_customer t on s.customer_id = t.customer_id
where s.customer_id is null and row_is_current = 1
)
update dels
set dels.row_is_current = -999
--row_expire_date = @load_time /*maybe optional*/
from cte_deleted_records dels
select * from dbo.customer
select * from dbo.dim_customer
--truncate table dbo.dim_customer
[img2]
update dbo.customer
set favorite_car = 'jaguar'
where customer_id = 1
update dbo.customer
set favorite_car = 'mazda',
city = 'bloemfontein'
where customer_id = 2
insert into dbo.customer(customer_name,city,favorite_car)
values ('mary','port elizabeth','ford')
select * from dbo.customer
delete customer where customer_id = 1
[img3][img4]
Comments
Post a Comment