/* * raid6check - extended consistency check for RAID-6 * * Copyright (C) 2011 Piergiorgio Sartor * * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Author: Piergiorgio Sartor * Based on "restripe.c" from "mdadm" codebase */#include"mdadm.h"#include<stdint.h>#include<signal.h>#include<sys/mman.h>#define CHECK_PAGE_BITS (12)#define CHECK_PAGE_SIZE (1 << CHECK_PAGE_BITS)charconstName[]="raid6check";enumrepair{NO_REPAIR=0,MANUAL_REPAIR,AUTO_REPAIR};intgeo_map(intblock,unsignedlonglongstripe,intraid_disks,intlevel,intlayout);intis_ddf(intlayout);voidqsyndrome(uint8_t*p,uint8_t*q,uint8_t**sources,intdisks,intsize);voidmake_tables(void);voidensure_zero_has_size(intchunk_size);voidraid6_datap_recov(intdisks,size_tbytes,intfaila,uint8_t**ptrs,intneg_offset);voidraid6_2data_recov(intdisks,size_tbytes,intfaila,intfailb,uint8_t**ptrs,intneg_offset);voidxor_blocks(char*target,char**sources,intdisks,intsize);/* Collect per stripe consistency information */voidraid6_collect(intchunk_size,uint8_t*p,uint8_t*q,char*chunkP,char*chunkQ,int*results){inti;intdata_id;uint8_tPx,Qx;externuint8_traid6_gflog[];for(i=0;i<chunk_size;i++){Px=(uint8_t)chunkP[i]^(uint8_t)p[i];Qx=(uint8_t)chunkQ[i]^(uint8_t)q[i];if((Px!=0)&&(Qx==0))results[i]=-1;if((Px==0)&&(Qx!=0))results[i]=-2;if((Px!=0)&&(Qx!=0)){data_id=(raid6_gflog[Qx]-raid6_gflog[Px]);if(data_id<0)data_id+=255;results[i]=data_id;}if((Px==0)&&(Qx==0))results[i]=-255;}}/* Try to find out if a specific disk has problems in a CHECK_PAGE_SIZE page size */intraid6_stats_blk(int*results,intraid_disks){inti;intcurr_broken_disk=-255;intprev_broken_disk=-255;intbroken_status=0;for(i=0;i<CHECK_PAGE_SIZE;i++){if(results[i]!=-255)curr_broken_disk=results[i];if(curr_broken_disk>=raid_disks)broken_status=2;switch(broken_status){case0:if(curr_broken_disk!=-255){prev_broken_disk=curr_broken_disk;broken_status=1;}break;case1:if(curr_broken_disk!=prev_broken_disk)broken_status=2;break;case2:default:curr_broken_disk=prev_broken_disk=-65535;break;}}returncurr_broken_disk;}/* Collect disks status for a strip in CHECK_PAGE_SIZE page size blocks */voidraid6_stats(int*disk,int*results,intraid_disks,intchunk_size){inti,j;for(i=0,j=0;i<chunk_size;i+=CHECK_PAGE_SIZE,j++){disk[j]=raid6_stats_blk(&results[i],raid_disks);}}intlock_stripe(structmdinfo*info,unsignedlonglongstart,intchunk_size,intdata_disks,sighandler_t*sig){intrv;if(mlockall(MCL_CURRENT|MCL_FUTURE)!=0){return2;}sig[0]=signal(SIGTERM,SIG_IGN);sig[1]=signal(SIGINT,SIG_IGN);sig[2]=signal(SIGQUIT,SIG_IGN);rv=sysfs_set_num(info,NULL,"suspend_lo",start*chunk_size*data_disks);rv|=sysfs_set_num(info,NULL,"suspend_hi",(start+1)*chunk_size*data_disks);returnrv*256;}intunlock_all_stripes(structmdinfo*info,sighandler_t*sig){intrv;rv=sysfs_set_num(info,NULL,"suspend_lo",0x7FFFFFFFFFFFFFFFULL);rv|=sysfs_set_num(info,NULL,"suspend_hi",0);rv|=sysfs_set_num(info,NULL,"suspend_lo",0);signal(SIGQUIT,sig[2]);signal(SIGINT,sig[1]);signal(SIGTERM,sig[0]);if(munlockall()!=0)return3;returnrv*256;}/* Autorepair */intautorepair(int*disk,unsignedlonglongstart,intchunk_size,char*name[],intraid_disks,intsyndrome_disks,char**blocks_page,char**blocks,uint8_t*p,int*block_index_for_slot,int*source,unsignedlonglong*offsets){inti,j;intpages_to_write_count=0;intpage_to_write[chunk_size>>CHECK_PAGE_BITS];for(j=0;j<(chunk_size>>CHECK_PAGE_BITS);j++){if(disk[j]>=-2&&block_index_for_slot[disk[j]]>=0){intslot=block_index_for_slot[disk[j]];printf("Auto-repairing slot %d (%s)\n",slot,name[slot]);pages_to_write_count++;page_to_write[j]=1;for(i=-2;i<syndrome_disks;i++){blocks_page[i]=blocks[i]+j*CHECK_PAGE_SIZE;}if(disk[j]==-2){qsyndrome(p,(uint8_t*)blocks_page[-2],(uint8_t**)blocks_page,syndrome_disks,CHECK_PAGE_SIZE);}else{char*all_but_failed_blocks[syndrome_disks];for(i=0;i<syndrome_disks;i++){if(i==disk[j])all_but_failed_blocks[i]=blocks_page[-1];elseall_but_failed_blocks[i]=blocks_page[i];}xor_blocks(blocks_page[disk[j]],all_but_failed_blocks,syndrome_disks,CHECK_PAGE_SIZE);}}else{page_to_write[j]=0;}}if(pages_to_write_count>0){intwrite_res=0;for(j=0;j<(chunk_size>>CHECK_PAGE_BITS);j++){if(page_to_write[j]==1){intslot=block_index_for_slot[disk[j]];lseek64(source[slot],offsets[slot]+start*chunk_size+j*CHECK_PAGE_SIZE,SEEK_SET);write_res+=write(source[slot],blocks[disk[j]]+j*CHECK_PAGE_SIZE,CHECK_PAGE_SIZE);}}if(write_res!=(CHECK_PAGE_SIZE*pages_to_write_count)){fprintf(stderr,"Failed to write a full chunk.\n");return-1;}}return0;}/* Manual repair */intmanual_repair(intchunk_size,intsyndrome_disks,intfailed_slot1,intfailed_slot2,unsignedlonglongstart,int*block_index_for_slot,char*name[],char**stripes,char**blocks,uint8_t*p,int*source,unsignedlonglong*offsets){inti;intfd1=block_index_for_slot[failed_slot1];intfd2=block_index_for_slot[failed_slot2];printf("Repairing stripe %llu\n",start);printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",fd1,name[fd1],fd2,name[fd2]);if(failed_slot1==-2||failed_slot2==-2){char*all_but_failed_blocks[syndrome_disks];intfailed_data_or_p;if(failed_slot1==-2)failed_data_or_p=failed_slot2;elsefailed_data_or_p=failed_slot1;printf("Repairing D/P(%d) and Q\n",failed_data_or_p);for(i=0;i<syndrome_disks;i++){if(i==failed_data_or_p)all_but_failed_blocks[i]=blocks[-1];elseall_but_failed_blocks[i]=blocks[i];}xor_blocks(blocks[failed_data_or_p],all_but_failed_blocks,syndrome_disks,chunk_size);qsyndrome(p,(uint8_t*)blocks[-2],(uint8_t**)blocks,syndrome_disks,chunk_size);}else{ensure_zero_has_size(chunk_size);if(failed_slot1==-1||failed_slot2==-1){intfailed_data;if(failed_slot1==-1)failed_data=failed_slot2;elsefailed_data=failed_slot1;printf("Repairing D(%d) and P\n",failed_data);raid6_datap_recov(syndrome_disks+2,chunk_size,failed_data,(uint8_t**)blocks,1);}else{printf("Repairing D and D\n");raid6_2data_recov(syndrome_disks+2,chunk_size,failed_slot1,failed_slot2,(uint8_t**)blocks,1);}}intwrite_res1,write_res2;off64_tseek_res;seek_res=lseek64(source[fd1],offsets[fd1]+start*chunk_size,SEEK_SET);if(seek_res<0){fprintf(stderr,"lseek failed for failed_disk1\n");return-1;}write_res1=write(source[fd1],blocks[failed_slot1],chunk_size);seek_res=lseek64(source[fd2],offsets[fd2]+start*chunk_size,SEEK_SET);if(seek_res<0){fprintf(stderr,"lseek failed for failed_disk2\n");return-1;}write_res2=write(source[fd2],blocks[failed_slot2],chunk_size);if(write_res1!=chunk_size||write_res2!=chunk_size){fprintf(stderr,"Failed to write a complete chunk.\n");return-2;}return0;}intcheck_stripes(structmdinfo*info,int*source,unsignedlonglong*offsets,intraid_disks,intchunk_size,intlevel,intlayout,unsignedlonglongstart,unsignedlonglonglength,char*name[],enumrepairrepair,intfailed_disk1,intfailed_disk2){/* read the data and p and q blocks, and check we got them right */intdata_disks=raid_disks-2;intsyndrome_disks=data_disks+is_ddf(layout)*2;char*stripe_buf;/* stripes[] is indexed by raid_disk and holds chunks from each device */char**stripes=xmalloc(raid_disks*sizeof(char*));/* blocks[] is indexed by syndrome number and points to either one of the * chunks from 'stripes[]', or to a chunk of zeros. -1 and -2 are * P and Q */char**blocks=xmalloc((syndrome_disks+2)*sizeof(char*));/* blocks_page[] is a temporary index to just one page of the chunks * that blocks[] points to. */char**blocks_page=xmalloc((syndrome_disks+2)*sizeof(char*));/* block_index_for_slot[] provides the reverse mapping from blocks to stripes. * The index is a syndrome position, the content is a raid_disk number. * indicies -1 and -2 work, and are P and Q disks */int*block_index_for_slot=xmalloc((syndrome_disks+2)*sizeof(int));/* 'p' and 'q' contain calcualted P and Q, to be compared with * blocks[-1] and blocks[-2]; */uint8_t*p=xmalloc(chunk_size);uint8_t*q=xmalloc(chunk_size);char*zero=xmalloc(chunk_size);int*results=xmalloc(chunk_size*sizeof(int));sighandler_t*sig=xmalloc(3*sizeof(sighandler_t));inti,j;intdiskP,diskQ,diskD;interr=0;externinttables_ready;if(!tables_ready)make_tables();if(posix_memalign((void**)&stripe_buf,4096,raid_disks*chunk_size)!=0)exit(4);block_index_for_slot+=2;blocks+=2;blocks_page+=2;memset(zero,0,chunk_size);for(i=0;i<raid_disks;i++)stripes[i]=stripe_buf+i*chunk_size;while(length>0){/* The syndrome number of the broken disk is recorded * in 'disk[]' which allows a different broken disk for * each page. */intdisk[chunk_size>>CHECK_PAGE_BITS];err=lock_stripe(info,start,chunk_size,data_disks,sig);if(err!=0){if(err!=2)unlock_all_stripes(info,sig);gotoexitCheck;}for(i=0;i<raid_disks;i++){off64_tseek_res=lseek64(source[i],offsets[i]+start*chunk_size,SEEK_SET);if(seek_res<0){fprintf(stderr,"lseek to source %d failed\n",i);unlock_all_stripes(info,sig);err=-1;gotoexitCheck;}intread_res=read(source[i],stripes[i],chunk_size);if(read_res<chunk_size){fprintf(stderr,"Failed to read complete chunk disk %d, aborting\n",i);unlock_all_stripes(info,sig);err=-1;gotoexitCheck;}}diskP=geo_map(-1,start,raid_disks,level,layout);block_index_for_slot[-1]=diskP;blocks[-1]=stripes[diskP];diskQ=geo_map(-2,start,raid_disks,level,layout);block_index_for_slot[-2]=diskQ;blocks[-2]=stripes[diskQ];if(!is_ddf(layout)){/* The syndrome-order of disks starts immediately after 'Q', * but skips P */diskD=diskQ;for(i=0;i<data_disks;i++){diskD=diskD+1;if(diskD>=raid_disks)diskD=0;if(diskD==diskP)diskD+=1;if(diskD>=raid_disks)diskD=0;blocks[i]=stripes[diskD];block_index_for_slot[i]=diskD;}}else{/* The syndrome-order exactly follows raid-disk * numbers, with ZERO in place of P and Q */for(i=0;i<raid_disks;i++){if(i==diskP||i==diskQ){blocks[i]=zero;block_index_for_slot[i]=-1;}else{blocks[i]=stripes[i];block_index_for_slot[i]=i;}}}qsyndrome(p,q,(uint8_t**)blocks,syndrome_disks,chunk_size);raid6_collect(chunk_size,p,q,stripes[diskP],stripes[diskQ],results);raid6_stats(disk,results,raid_disks,chunk_size);for(j=0;j<(chunk_size>>CHECK_PAGE_BITS);j++){introle=disk[j];if(role>=-2){intslot=block_index_for_slot[role];if(slot>=0)printf("Error detected at stripe %llu, page %d: possible failed disk slot %d: %d --> %s\n",start,j,role,slot,name[slot]);elseprintf("Error detected at stripe %llu, page %d: failed slot %d should be zeros\n",start,j,role);}elseif(disk[j]==-65535){printf("Error detected at stripe %llu, page %d: disk slot unknown\n",start,j);}}if(repair==AUTO_REPAIR){err=autorepair(disk,start,chunk_size,name,raid_disks,syndrome_disks,blocks_page,blocks,p,block_index_for_slot,source,offsets);if(err!=0){unlock_all_stripes(info,sig);gotoexitCheck;}}if(repair==MANUAL_REPAIR){intfailed_slot1=-1,failed_slot2=-1;for(i=-2;i<syndrome_disks;i++){if(block_index_for_slot[i]==failed_disk1)failed_slot1=i;if(block_index_for_slot[i]==failed_disk2)failed_slot2=i;}err=manual_repair(chunk_size,syndrome_disks,failed_slot1,failed_slot2,start,block_index_for_slot,name,stripes,blocks,p,source,offsets);if(err==-1){unlock_all_stripes(info,sig);gotoexitCheck;}}err=unlock_all_stripes(info,sig);if(err!=0){gotoexitCheck;}length--;start++;}exitCheck:free(stripe_buf);free(stripes);free(blocks-2);free(blocks_page-2);free(block_index_for_slot-2);free(p);free(q);free(results);free(sig);returnerr;}unsignedlonglonggetnum(char*str,char**err){char*e;unsignedlonglongrv=strtoull(str,&e,10);if(e==str||*e){*err=str;return0;}returnrv;}intmain(intargc,char*argv[]){/* md_device start length */int*fds=NULL;char*buf=NULL;char**disk_name=NULL;unsignedlonglong*offsets=NULL;intraid_disks=0;intactive_disks;intchunk_size=0;intlayout=-1;intlevel=6;enumrepairrepair=NO_REPAIR;intfailed_disk1=-1;intfailed_disk2=-1;unsignedlonglongstart,length;inti;intmdfd;structmdinfo*info=NULL,*comp=NULL;char*err=NULL;intexit_err=0;intclose_flag=0;char*prg=strrchr(argv[0],'/');if(prg==NULL)prg=argv[0];elseprg++;if(argc<4){fprintf(stderr,"Usage: %s md_device start_stripe length_stripes [autorepair]\n",prg);fprintf(stderr," or: %s md_device repair stripe failed_slot_1 failed_slot_2\n",prg);exit_err=1;gotoexitHere;}mdfd=open(argv[1],O_RDONLY);if(mdfd<0){perror(argv[1]);fprintf(stderr,"%s: cannot open %s\n",prg,argv[1]);exit_err=2;gotoexitHere;}info=sysfs_read(mdfd,NULL,GET_LEVEL|GET_LAYOUT|GET_DISKS|GET_STATE|GET_COMPONENT|GET_CHUNK|GET_DEVS|GET_OFFSET|GET_SIZE);if(info==NULL){fprintf(stderr,"%s: Error reading sysfs information of %s\n",prg,argv[1]);exit_err=9;gotoexitHere;}if(info->array.level!=level){fprintf(stderr,"%s: %s not a RAID-6\n",prg,argv[1]);exit_err=3;gotoexitHere;}if(info->array.failed_disks>0){fprintf(stderr,"%s: %s degraded array\n",prg,argv[1]);exit_err=8;gotoexitHere;}printf("layout: %d\n",info->array.layout);printf("disks: %d\n",info->array.raid_disks);printf("component size: %llu\n",info->component_size*512);printf("total stripes: %llu\n",(info->component_size*512)/info->array.chunk_size);printf("chunk size: %d\n",info->array.chunk_size);printf("\n");comp=info->devs;for(i=0,active_disks=0;active_disks<info->array.raid_disks;i++){printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n",i,comp->data_offset*512,comp->component_size*512,map_dev(comp->disk.major,comp->disk.minor,0),comp->disk.raid_disk);if(comp->disk.raid_disk>=0)active_disks++;comp=comp->next;}printf("\n");close(mdfd);raid_disks=info->array.raid_disks;chunk_size=info->array.chunk_size;layout=info->array.layout;if(strcmp(argv[2],"repair")==0){if(argc<6){fprintf(stderr,"For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n",prg);exit_err=1;gotoexitHere;}repair=MANUAL_REPAIR;start=getnum(argv[3],&err);length=1;failed_disk1=getnum(argv[4],&err);failed_disk2=getnum(argv[5],&err);if(failed_disk1>=info->array.raid_disks){fprintf(stderr,"%s: failed_slot_1 index is higher than number of devices in raid\n",prg);exit_err=4;gotoexitHere;}if(failed_disk2>=info->array.raid_disks){fprintf(stderr,"%s: failed_slot_2 index is higher than number of devices in raid\n",prg);exit_err=4;gotoexitHere;}if(failed_disk1==failed_disk2){fprintf(stderr,"%s: failed_slot_1 and failed_slot_2 are the same\n",prg);exit_err=4;gotoexitHere;}}else{start=getnum(argv[2],&err);length=getnum(argv[3],&err);if(argc>=5&&strcmp(argv[4],"autorepair")==0)repair=AUTO_REPAIR;}if(err){fprintf(stderr,"%s: Bad number: %s\n",prg,err);exit_err=4;gotoexitHere;}if(start>((info->component_size*512)/chunk_size)){start=(info->component_size*512)/chunk_size;fprintf(stderr,"%s: start beyond disks size\n",prg);}if((length==0)||((length+start)>((info->component_size*512)/chunk_size))){length=(info->component_size*512)/chunk_size-start;}disk_name=xmalloc(raid_disks*sizeof(*disk_name));fds=xmalloc(raid_disks*sizeof(*fds));offsets=xcalloc(raid_disks,sizeof(*offsets));buf=xmalloc(raid_disks*chunk_size);for(i=0;i<raid_disks;i++){fds[i]=-1;}close_flag=1;comp=info->devs;for(i=0,active_disks=0;active_disks<raid_disks;i++){intdisk_slot=comp->disk.raid_disk;if(disk_slot>=0){disk_name[disk_slot]=map_dev(comp->disk.major,comp->disk.minor,0);offsets[disk_slot]=comp->data_offset*512;fds[disk_slot]=open(disk_name[disk_slot],O_RDWR|O_DIRECT);if(fds[disk_slot]<0){perror(disk_name[disk_slot]);fprintf(stderr,"%s: cannot open %s\n",prg,disk_name[disk_slot]);exit_err=6;gotoexitHere;}active_disks++;}comp=comp->next;}intrv=check_stripes(info,fds,offsets,raid_disks,chunk_size,level,layout,start,length,disk_name,repair,failed_disk1,failed_disk2);if(rv!=0){fprintf(stderr,"%s: check_stripes returned %d\n",prg,rv);exit_err=7;gotoexitHere;}exitHere:if(close_flag)for(i=0;i<raid_disks;i++)close(fds[i]);free(disk_name);free(fds);free(offsets);free(buf);exit(exit_err);}