Wednesday, 28 January 2015

Case
I have a lot of dimension packages in SSIS that all insert a default record for unknown dimension values. It's a lot of repetitive and boring work. Is there an alternative for creating an insert query manually?

A typical dimension package

Solution
Instead of creating an insert query manually for each dimension table you could also create a Stored Procedure to do this for you. Instead of the insert query in the Execute SQL Task you execute this Stored Procedure in the Execute SQL Task.

-- TSQL code
USE [datamart]
GO
/****** datamart: StoredProcedure [dbo].[InsertUnknownDimensionRow] ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [dbo].[InsertUnknownDimensionRow](@TableName nvarchar(128))
AS
BEGIN
-- This Stored Procedure inserts a record in the dimension table
-- for unknown dimension values. It generates an insert statement
-- based on the column datatypes and executes it.
-- The integer column with identity enabled gets the value -1 and
-- all other columns get a default value based on their datatype.
-- Columns with a default value are ignored.
-- Create temporary table for column specs of dimension table
DECLARE @TableSpecs TABLE (
COLUMN_ID int identity,
COLUMN_NAME nvarchar(128),
DATA_TYPE nvarchar(128),
CHARACTER_MAXIMUM_LENGTH int,
COLUMN_IS_IDENTITY bit
)
-- Use the information schema to get column info and insert it
-- to the temporary table.
INSERT @TableSpecs
SELECT C.COLUMN_NAME
, C.DATA_TYPE
, C.CHARACTER_MAXIMUM_LENGTH
, columnproperty(object_id(C.TABLE_SCHEMA + '.' + C.TABLE_NAME)
, C.COLUMN_NAME, 'IsIdentity') AS COLUMN_IS_IDENTITY
FROM INFORMATION_SCHEMA.COLUMNS C
WHERE QUOTENAME(C.TABLE_NAME) = QUOTENAME(@TableName)
AND C.COLUMN_DEFAULT IS NULL
ORDER BY C.ORDINAL_POSITION
-- Variables to keep track of the number of columns
DECLARE @ColumnId INT
SET @ColumnId = -1
DECLARE @ColumnCount INT
SET @ColumnCount = 0
-- Variables to create the insert query
DECLARE @INSERTSTATEMENT_START nvarchar(max)
DECLARE @INSERTSTATEMENT_END nvarchar(max)
SET @INSERTSTATEMENT_START = 'INSERT INTO ' + QUOTENAME(@TableName) + ' ('
SET @INSERTSTATEMENT_END = 'VALUES ('
-- Variables to complete the insert query with
-- extra enable and disable identity statements
-- You could add an extra check in the loop to
-- make sure there is an identity column in the
-- table. Otherwise the SET IDENTITY_INSERT
-- statement will fail.
DECLARE @IDENITYSTATEMENT_ON nvarchar(255)
DECLARE @IDENITYSTATEMENT_OFF nvarchar(255)
SET @IDENITYSTATEMENT_ON = 'SET IDENTITY_INSERT ' + QUOTENAME(@TableName) + ' ON;'
SET @IDENITYSTATEMENT_OFF = 'SET IDENTITY_INSERT ' + QUOTENAME(@TableName) + ' OFF;'
-- Variables filled and use the WHILE loop
DECLARE @COLUMN_NAME VARCHAR(50)
DECLARE @DATA_TYPE VARCHAR(50)
DECLARE @CHARACTER_MAXIMUM_LENGTH INT
DECLARE @COLUMN_IS_IDENTITY BIT
-- WHILE loop to loop through all columns and
-- create a insert query with the columns
WHILE @ColumnId IS NOT NULL
BEGIN
-- Keep track of the number of columns
SELECT @ColumnId = MIN(COLUMN_ID)
, @ColumnCount = @ColumnCount + 1
FROM @TableSpecs
WHERE COLUMN_ID > @ColumnCount
-- Check if there are any columns left
IF @ColumnId IS NULL
BEGIN
-- No columns left, break loop
BREAK
END
ELSE
BEGIN
-- Get info for column number x
SELECT @COLUMN_NAME = COLUMN_NAME
, @DATA_TYPE = DATA_TYPE
, @CHARACTER_MAXIMUM_LENGTH = CHARACTER_MAXIMUM_LENGTH
, @COLUMN_IS_IDENTITY = COLUMN_IS_IDENTITY
FROM @TableSpecs
WHERE COLUMN_ID = @ColumnCount
END
-- Start building the begin of the statement (same for each column)
SET @INSERTSTATEMENT_START = @INSERTSTATEMENT_START + @COLUMN_NAME + ','
-- Start building the end of the statement (the default values)
IF @COLUMN_IS_IDENTITY = 1
BEGIN
-- Default value if the current column is the identity column
SET @INSERTSTATEMENT_END = @INSERTSTATEMENT_END + '-1,'
END
IF @DATA_TYPE IN ('int', 'numeric', 'decimal', 'money', 'float', 'real', 'bigint', 'smallint', 'tinyint', 'smallmoney') AND (@COLUMN_IS_IDENTITY = 0)
BEGIN
-- Default value if the current column is a numeric column,
-- but not an identity: zero
SET @INSERTSTATEMENT_END = @INSERTSTATEMENT_END + '0,'
END
IF @DATA_TYPE IN ('char', 'nchar', 'varchar', 'nvarchar')
BEGIN
-- Default value if the current column is a text column
-- Part of the text "unknown" depending on the length
SET @INSERTSTATEMENT_END = @INSERTSTATEMENT_END + '''' + LEFT('Unknown', @CHARACTER_MAXIMUM_LENGTH) + ''','
END
IF @DATA_TYPE IN ('datetime', 'date', 'timestamp', 'datatime2', 'datetimeoffset', 'smalldatetime', 'time')
BEGIN
-- Default value if the current column is a datetime column
-- First of january 1900
SET @INSERTSTATEMENT_END = @INSERTSTATEMENT_END + '''' + CONVERT(varchar, CONVERT(date, 'Jan 1 1900')) + ''','
END
IF @DATA_TYPE = 'bit'
BEGIN
-- Default value if the current column is a boolean
SET @INSERTSTATEMENT_END = @INSERTSTATEMENT_END + '0,'
END
END
-- Remove last comma from start and end part of the insert statement
SET @INSERTSTATEMENT_START = LEFT(@INSERTSTATEMENT_START, LEN(@INSERTSTATEMENT_START) - 1) + ')'
SET @INSERTSTATEMENT_END = LEFT(@INSERTSTATEMENT_END, LEN(@INSERTSTATEMENT_END) - 1) + ');'
-- Execute the complete statement
EXEC (@IDENITYSTATEMENT_ON + ' ' + @INSERTSTATEMENT_START + ' ' + @INSERTSTATEMENT_END + ' ' + @IDENITYSTATEMENT_OFF)
END
GO
-- Tweak the code for your own needs and standards
-- Optional extra check if you don't want to truncate
-- your dimensions: is there already a default/unknown
-- record available

Execute Stored Procedure

Note: only the most common datatypes are handled. Add more if-statements if you expect data types like varbinary, xml, image or sql_variant