Tuesday, March 11, 2014

Compare Data from Two Tables

There comes a time when you need to compare data from two structurally identical tables. When that time comes, you can either go the Excel route (I respect Excel), ask your boss to lend you the company credit card and purchase commercial package from the likes of RedGate®, or you could just code the darn thing.

I must say however, that I've looked at many different options, I've tried SSIS, SQL Server Data Tools, RedGate® Data Compare, and SQL Server's less known about tablediff.exe. None of these actually gave me the results I needed, or I just wasn't happy with the output format.

I found some nifty little piece of code written by Roman Pekar and this sort of made the job easier. I improved it a little and added a little bit of dynamism to the script. Now you can pass any two identical tables and let SQL compare the data for you.

Without further ado, here is the code...Please feel free to improve it. I tried to comment it as much as I could.
alter procedure table_compare
@first_table varchar(20),
@second_table varchar(20),
@id_column varchar(20),
@column_list varchar(max) = null
as
/********************************************************************************************************************************
*********************************************************************************************************************************
*********************************************************************************************************************************
****** Author: Thato Mantai
****** Purpose: Compare data from two identical tables
****** Date: 11 March 2014
****** Version: 1.0
****** Credits: Roman Pekar (http://careers.stackoverflow.com/romanpekar , http://ru.linkedin.com/in/romanpekar/)
*********************************************************************************************************************************
*********************************************************************************************************************************
*********************************************************************************************************************************/
declare @xml_first_table xml   --xml structure to hold data from the first table
declare @xml_second_table xml   --xml structure to hold data from the second table
declare @sql_all_data varchar(4000)  --dynamic sql for storing unique records from both tables
declare @first_table_columns varchar(500) --stores all columns from first table
declare @second_table_columns varchar(500) --stores all columns from second table
declare @err_table_not_found varchar(200) set @err_table_not_found = 'Sorry, I can''t find specified table, "[plchldr_table_name]" in the current database.'
declare @err_key_column_not_found varchar(200) set @err_key_column_not_found = 'Sorry, I can''t find specified key column, "'+@id_column+'" in table "[plchldr_table_name]".'

if object_id(@first_table, 'u') is null  begin  set @err_table_not_found = replace(@err_table_not_found,'[plchldr_table_name]',@first_table)  raiserror(@err_table_not_found,18,1)  return - 1 end
if object_id(@second_table, 'u') is null  begin  set @err_table_not_found = replace(@err_table_not_found,'[plchldr_table_name]',@second_table)  raiserror(@err_table_not_found,18,1)  return - 1 end

if not exists(select 1 from sys.columns where object_name(object_id) = @first_table and name = @id_column)  begin   set @err_key_column_not_found = replace(@err_key_column_not_found,'[plchldr_table_name]',@first_table)   raiserror(@err_key_column_not_found,18,1)   return - 1  end
if not exists(select 1 from sys.columns where object_name(object_id) = @second_table and name = @id_column)  begin   set @err_key_column_not_found = replace(@err_key_column_not_found,'[plchldr_table_name]',@second_table)   raiserror(@err_key_column_not_found,18,1)   return - 1  end

/*let's start with a bit of housekeeping*/
if object_id('table_compare_all_data', 'u') is not null drop table table_compare_all_data; if object_id('temp_table_for_xml_1', 'u') is not null drop table temp_table_for_xml_1; if object_id('temp_table_for_xml_2', 'u') is not null drop table temp_table_for_xml_2;

/*get all columns from first and second, the else part still needs to be coded, suppose i could have used temp tables here...grrr*/
if @column_list is null begin  select  @first_table_columns = stuff(( select distinct top 100 percent '],[' + t.name from sys.columns as t where  object_name(object_id) = @first_table order by '],[' + t.name for xml path('')), 1, 2, '') + ']'  select  @second_table_columns = stuff(( select distinct top 100 percent '],[' + t.name from sys.columns as t where  object_name(object_id) = @second_table order by '],[' + t.name for xml path('')), 1, 2, '') + ']'  if @first_table_columns <> @second_table_columns  begin   raiserror('The two tables do not contain the same number of columns',18,1)   return - 1  end end

/*i suppose for this part i could have used "../ *[local-name()=sql:variable("@varname")]", but i ended up with errors. will look into this later */
set @first_table_columns = replace(@first_table_columns,'['+@id_column+']','['+@id_column+ '] as id_column') set @second_table_columns = replace(@second_table_columns,'['+@id_column+']','['+@id_column+ '] as id_column')
print @first_table_columns print @second_table_columns

/*build dynamic sql to store all data*/
set @sql_all_data = 'with cte_all_data as '+       '( '+        'select '+@first_table_columns+','+char(39)+'first_table'+char(39)+ ' source_table from ['+@first_table+'] '+         'union all '+        'select '+@first_table_columns+','+char(39)+'second_table'+char(39)+' source_table from ['+@second_table+']'+       ')'+      ' select * into table_compare_all_data from cte_all_data'
execute(@sql_all_data)

/*clean up the comma separated column list*/
if @column_list is null begin  set @first_table_columns = replace(@first_table_columns,'['+@id_column+ '] as id_column','id_column')  set @second_table_columns = replace(@second_table_columns,'['+@id_column+ '] as id_column','id_column') end
declare @sql_for_temp_1 varchar(500) declare @sql_for_temp_2 varchar(500)
set @sql_for_temp_1 = ' select top 5 * into temp_table_for_xml_1 from (' +       ' select '+@first_table_columns+' from table_compare_all_data where source_table = '+char(39)+'first_table'+char(39)+        ' except ' +        ' select '+@second_table_columns+' from table_compare_all_data where source_table = '+char(39)+'second_table'+char(39)+' ) as t1'      
set @sql_for_temp_2 = ' select top 15 * into temp_table_for_xml_2 from (' +       ' select '+@second_table_columns+' from table_compare_all_data where source_table = '+char(39)+'second_table'+char(39) +       ' except ' +        ' select '+@first_table_columns+' from table_compare_all_data where source_table = '+char(39)+'first_table'+char(39)+' ) as t1'
execute(@sql_for_temp_1) execute(@sql_for_temp_2)

select @xml_first_table = (select * from (select * from temp_table_for_xml_1) as a for xml raw('xml_first_table')) select @xml_second_table = (select * from (select * from temp_table_for_xml_2) as a for xml raw('xml_second_table'))
--print 'here'
--print @sql_all_data --print @sql_for_temp_2
--select * from table_compare_all_data
--select * from temp_table_for_xml_1
--select * from temp_table_for_xml_2
;with cte1 as (   select    t.c.value('../@id_column', 'nvarchar(20)') as id,    t.c.value('local-name(.)', 'nvarchar(128)') as name,    t.c.value('.', 'nvarchar(max)') as value   from @xml_first_table.nodes('xml_first_table/@*') as t(c)      ), cte2 as (   select    t.c.value('../@id_column', 'nvarchar(20)') as id,    t.c.value('local-name(.)', 'nvarchar(128)') as name,    t.c.value('.', 'nvarchar(max)') as value   from @xml_second_table.nodes('xml_second_table/@*') as t(c)       )
select   isnull(c1.id, c2.id) as key_value,    isnull(c1.name, c2.name) as column_name,    isnull(c1.value,'value not found in table '+ @first_table) as 'value_in_first_table', isnull(c2.value,'value not found in table '+ @second_table) as 'value_in_second_table'  from cte1 as c1   full outer join cte2 as c2 on c2.id = c1.id and c2.name = c1.name  where  not  (   c1.value is null and c2.value is null or   c1.value is not null and c2.value is not null and c1.value = c2.value  )
--table_compare 'user1','user2','uid'