Friday, August 24, 2012

I have been dealing with a table that has 400 million rows. The problem was reading these rows and transforming the results to store into another table. With 400 million rows it was taking several hours just to build up the cursor. The temporary files were 80GB as well.

So, here is a way to read a chunk of data (in this case 2 is a chunk) using an ID to mark our spot.

The "trick" is when there are no more rows, close the current cursor, reset the no more rows variable, update the variable that marks our spot, re-open the cursor and check to see if we found any rows. Pretty simple, pretty slick. :-)

-- --------------------------------------------------------------------------------
-- Routine DDL
-- Note: comments before and after the routine body will not be stored by the server
-- --------------------------------------------------------------------------------
DELIMITER $$
CREATE DEFINER=`root`@`localhost` PROCEDURE `junk`()
BEGIN
DECLARE lastId INT DEFAULT 0;
DECLARE recordID INT;
DECLARE recordText VARCHAR(45);
DECLARE no_more_rows BOOLEAN;
DECLARE loop_cntr INT DEFAULT 0;
DECLARE num_rows INT DEFAULT 0;
-- tablea has two columns, id (INT) and data (varchar(45))
DECLARE mycur CURSOR FOR
SELECT T.id, T.data FROM `test`.`tablea` T WHERE T.id > lastID ORDER BY T.id ASC LIMIT 2 ;
DECLARE CONTINUE HANDLER FOR NOT FOUND
SET no_more_rows = TRUE;
-- 'open' the cursor and capture the number of rows returned
-- (the 'select' gets invoked when the cursor is 'opened')
OPEN mycur;
select FOUND_ROWS() into num_rows;
select num_rows;
the_loop: LOOP
FETCH mycur
INTO recordID
, recordText;
-- break out of the loop if
-- 1) there were no records, or
-- 2) we've processed them all
IF no_more_rows THEN
CLOSE mycur;
SET no_more_rows = FALSE;
set lastId = recordID;
SELECT lastID, '< The last id';
OPEN mycur;
select FOUND_ROWS() into num_rows;
IF num_rows = 0 THEN
CLOSE mycur;
LEAVE the_loop;
ELSE
ITERATE the_loop;
END IF;
ELSE
-- the equivalent of a 'print statement' in a stored procedure
-- it simply displays output for each loop
select recordID, recordText;
END IF;
-- count the number of times looped
SET loop_cntr = loop_cntr + 1;
END LOOP the_loop;
-- 'print' the output so we can see they are the same
select num_rows, loop_cntr;
END