Monday, February 29, 2016

Slowly Changing Dimension Type II: My Simple Personal Alternative To Merge

I've been working with the SQL Server's merge statement for some time now. I truly believe it's a great feature. Alas, the world out there is not so perfect. You'll soon run into a few hurdles, sooner than you know it. They can range from cross server limitations, deadlocks due to performance degradation. The list goes on and on...
Google: sql server merge statement limitations

I found myself having to come up with a custom solution. yes, a lot has been written on SCD Type II. Reinvent the wheel much???

I will create two tables...
1. dbo.customer table. transactional table
2. dbo.dim_customer table dimension table

create table dbo.customer
(
customer_id int identity(1,1) not null,
customer_name varchar(50) null,
city varchar(50) null,
favorite_car varchar(50) null
)

create table dbo.dim_customer
(
customer_id int null,
customer_name varchar(50) null,
city varchar(50) null,
favorite_car varchar(50) null,
row_is_current int not null,
row_effective_date date not null,
row_expire_date date not null
)


1.
truncate table dbo.customer
dbcc checkident ('dbo.customer', reseed,1)
;
with
cte_customers as
(
select 'john' customer_name,'cape town' city,'jaguar' favorite_car union all
select 'paul' customer_name,'johannesburg' city,null union all
select 'peter' customer_name,'pretoria' city,null union all
select 'matthew' customer_name,'durban' city,null
)
insert into dbo.customer(customer_name,city,favorite_car)
select customer_name,city,favorite_car
from cte_customers

select * from dbo.customer
 [img1]

declare @not_expired datetime
declare @load_time datetime
set  @not_expired = cast('99991231' as date)
set  @load_time = cast(getdate() as date)

update t set
  t.row_is_current = -1,
  t.row_expire_date = @load_time
from dbo.customer s
  inner join dbo.dim_customer t on s.customer_id = t.customer_id
  and t.row_is_current = 1 and t.row_expire_date = @not_expired
where t.city <> s.city
  or t.favorite_car <> s.favorite_car


/*insert changes where row_is_current is -1*/
insert into dbo.dim_customer
select s.*,1 row_is_current,@load_time row_effective_date,@not_expired row_expire_date
from dbo.customer s
  inner join dbo.dim_customer t on s.customer_id = t.customer_id
where t.row_is_current = -1 and t.row_expire_date = @load_time

/*update changes set row_is_current to 0 where row_is_current is -1*/
update t set
  t.row_is_current = 0,
  t.row_expire_date = @load_time
from dbo.dim_customer t
where t.row_is_current = -1 and t.row_expire_date = @load_time

/*new*/
insert into dbo.dim_customer
select s.*,1 row_is_current,@load_time row_effective_date,@not_expired row_expire_date
from dbo.customer s
  left join dbo.dim_customer t on s.customer_id = t.customer_id
where t.customer_id is null

/*
deleted rows, how does everyone deal with these anyways
*/
;
with
cte_deleted_records as
(
select t.customer_id,row_is_current
from dbo.customer s
right join dbo.dim_customer t on s.customer_id = t.customer_id
where s.customer_id is null and row_is_current = 1
)
update dels
set dels.row_is_current = -999
--row_expire_date = @load_time /*maybe optional*/
from cte_deleted_records dels

select * from dbo.customer
select * from dbo.dim_customer
--truncate table dbo.dim_customer

[img2]


update dbo.customer
set  favorite_car = 'jaguar'
where customer_id = 1

update dbo.customer
set  favorite_car = 'mazda',
  city = 'bloemfontein'
where customer_id = 2

insert into dbo.customer(customer_name,city,favorite_car)
  values ('mary','port elizabeth','ford')

select * from dbo.customer
delete customer where customer_id = 1
[img3]
[img4]