/* * Copyright (C) 2013 FUJITSU LIMITED. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License v2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */#include"kerncompat.h"#include"androidcompat.h"#include<stdio.h>#include<stdio_ext.h>#include<stdlib.h>#include<sys/types.h>#include<sys/stat.h>#include<fcntl.h>#include<unistd.h>#include<uuid/uuid.h>#include<pthread.h>#include"list.h"#include"radix-tree.h"#include"ctree.h"#include"extent-cache.h"#include"disk-io.h"#include"volumes.h"#include"transaction.h"#include"crc32c.h"#include"utils.h"#include"btrfsck.h"#include"commands.h"structrecover_control{intverbose;intyes;u16csum_size;u32sectorsize;u32nodesize;u64generation;u64chunk_root_generation;structbtrfs_fs_devices*fs_devices;structcache_treechunk;structblock_group_treebg;structdevice_extent_treedevext;structcache_treeeb_cache;structlist_headgood_chunks;structlist_headbad_chunks;structlist_headrebuild_chunks;structlist_headunrepaired_chunks;pthread_mutex_trc_lock;};structextent_record{structcache_extentcache;u64generation;u8csum[BTRFS_CSUM_SIZE];structbtrfs_device*devices[BTRFS_MAX_MIRRORS];u64offsets[BTRFS_MAX_MIRRORS];intnmirrors;};structdevice_scan{structrecover_control*rc;structbtrfs_device*dev;intfd;u64bytenr;};staticstructextent_record*btrfs_new_extent_record(structextent_buffer*eb){structextent_record*rec;rec=calloc(1,sizeof(*rec));if(!rec){fprintf(stderr,"Fail to allocate memory for extent record.\n");exit(1);}rec->cache.start=btrfs_header_bytenr(eb);rec->cache.size=eb->len;rec->generation=btrfs_header_generation(eb);read_extent_buffer(eb,rec->csum,(unsignedlong)btrfs_header_csum(eb),BTRFS_CSUM_SIZE);returnrec;}staticintprocess_extent_buffer(structcache_tree*eb_cache,structextent_buffer*eb,structbtrfs_device*device,u64offset){structextent_record*rec;structextent_record*exist;structcache_extent*cache;intret=0;rec=btrfs_new_extent_record(eb);if(!rec->cache.size)gotofree_out;again:cache=lookup_cache_extent(eb_cache,rec->cache.start,rec->cache.size);if(cache){exist=container_of(cache,structextent_record,cache);if(exist->generation>rec->generation)gotofree_out;if(exist->generation==rec->generation){if(exist->cache.start!=rec->cache.start||exist->cache.size!=rec->cache.size||memcmp(exist->csum,rec->csum,BTRFS_CSUM_SIZE)){ret=-EEXIST;}else{BUG_ON(exist->nmirrors>=BTRFS_MAX_MIRRORS);exist->devices[exist->nmirrors]=device;exist->offsets[exist->nmirrors]=offset;exist->nmirrors++;}gotofree_out;}remove_cache_extent(eb_cache,cache);free(exist);gotoagain;}rec->devices[0]=device;rec->offsets[0]=offset;rec->nmirrors++;ret=insert_cache_extent(eb_cache,&rec->cache);BUG_ON(ret);out:returnret;free_out:free(rec);gotoout;}staticvoidfree_extent_record(structcache_extent*cache){structextent_record*er;er=container_of(cache,structextent_record,cache);free(er);}FREE_EXTENT_CACHE_BASED_TREE(extent_record,free_extent_record);staticstructbtrfs_chunk*create_chunk_item(structchunk_record*record){structbtrfs_chunk*ret;structbtrfs_stripe*chunk_stripe;inti;if(!record||record->num_stripes==0)returnNULL;ret=malloc(btrfs_chunk_item_size(record->num_stripes));if(!ret)returnNULL;btrfs_set_stack_chunk_length(ret,record->length);btrfs_set_stack_chunk_owner(ret,record->owner);btrfs_set_stack_chunk_stripe_len(ret,record->stripe_len);btrfs_set_stack_chunk_type(ret,record->type_flags);btrfs_set_stack_chunk_io_align(ret,record->io_align);btrfs_set_stack_chunk_io_width(ret,record->io_width);btrfs_set_stack_chunk_sector_size(ret,record->sector_size);btrfs_set_stack_chunk_num_stripes(ret,record->num_stripes);btrfs_set_stack_chunk_sub_stripes(ret,record->sub_stripes);for(i=0,chunk_stripe=&ret->stripe;i<record->num_stripes;i++,chunk_stripe++){btrfs_set_stack_stripe_devid(chunk_stripe,record->stripes[i].devid);btrfs_set_stack_stripe_offset(chunk_stripe,record->stripes[i].offset);memcpy(chunk_stripe->dev_uuid,record->stripes[i].dev_uuid,BTRFS_UUID_SIZE);}returnret;}staticvoidinit_recover_control(structrecover_control*rc,intverbose,intyes){memset(rc,0,sizeof(structrecover_control));cache_tree_init(&rc->chunk);cache_tree_init(&rc->eb_cache);block_group_tree_init(&rc->bg);device_extent_tree_init(&rc->devext);INIT_LIST_HEAD(&rc->good_chunks);INIT_LIST_HEAD(&rc->bad_chunks);INIT_LIST_HEAD(&rc->rebuild_chunks);INIT_LIST_HEAD(&rc->unrepaired_chunks);rc->verbose=verbose;rc->yes=yes;pthread_mutex_init(&rc->rc_lock,NULL);}staticvoidfree_recover_control(structrecover_control*rc){free_block_group_tree(&rc->bg);free_chunk_cache_tree(&rc->chunk);free_device_extent_tree(&rc->devext);free_extent_record_tree(&rc->eb_cache);pthread_mutex_destroy(&rc->rc_lock);}staticintprocess_block_group_item(structblock_group_tree*bg_cache,structextent_buffer*leaf,structbtrfs_key*key,intslot){structblock_group_record*rec;structblock_group_record*exist;structcache_extent*cache;intret=0;rec=btrfs_new_block_group_record(leaf,key,slot);if(!rec->cache.size)gotofree_out;again:cache=lookup_cache_extent(&bg_cache->tree,rec->cache.start,rec->cache.size);if(cache){exist=container_of(cache,structblock_group_record,cache);/*check the generation and replace if needed*/if(exist->generation>rec->generation)gotofree_out;if(exist->generation==rec->generation){intoffset=offsetof(structblock_group_record,generation);/* * According to the current kernel code, the following * case is impossible, or there is something wrong in * the kernel code. */if(memcmp(((void*)exist)+offset,((void*)rec)+offset,sizeof(*rec)-offset))ret=-EEXIST;gotofree_out;}remove_cache_extent(&bg_cache->tree,cache);list_del_init(&exist->list);free(exist);/* * We must do search again to avoid the following cache. * /--old bg 1--//--old bg 2--/ * /--new bg--/ */gotoagain;}ret=insert_block_group_record(bg_cache,rec);BUG_ON(ret);out:returnret;free_out:free(rec);gotoout;}staticintprocess_chunk_item(structcache_tree*chunk_cache,structextent_buffer*leaf,structbtrfs_key*key,intslot){structchunk_record*rec;structchunk_record*exist;structcache_extent*cache;intret=0;rec=btrfs_new_chunk_record(leaf,key,slot);if(!rec->cache.size)gotofree_out;again:cache=lookup_cache_extent(chunk_cache,rec->offset,rec->length);if(cache){exist=container_of(cache,structchunk_record,cache);if(exist->generation>rec->generation)gotofree_out;if(exist->generation==rec->generation){intnum_stripes=rec->num_stripes;intrec_size=btrfs_chunk_record_size(num_stripes);intoffset=offsetof(structchunk_record,generation);if(exist->num_stripes!=rec->num_stripes||memcmp(((void*)exist)+offset,((void*)rec)+offset,rec_size-offset))ret=-EEXIST;gotofree_out;}remove_cache_extent(chunk_cache,cache);free(exist);gotoagain;}ret=insert_cache_extent(chunk_cache,&rec->cache);BUG_ON(ret);out:returnret;free_out:free(rec);gotoout;}staticintprocess_device_extent_item(structdevice_extent_tree*devext_cache,structextent_buffer*leaf,structbtrfs_key*key,intslot){structdevice_extent_record*rec;structdevice_extent_record*exist;structcache_extent*cache;intret=0;rec=btrfs_new_device_extent_record(leaf,key,slot);if(!rec->cache.size)gotofree_out;again:cache=lookup_cache_extent2(&devext_cache->tree,rec->cache.objectid,rec->cache.start,rec->cache.size);if(cache){exist=container_of(cache,structdevice_extent_record,cache);if(exist->generation>rec->generation)gotofree_out;if(exist->generation==rec->generation){intoffset=offsetof(structdevice_extent_record,generation);if(memcmp(((void*)exist)+offset,((void*)rec)+offset,sizeof(*rec)-offset))ret=-EEXIST;gotofree_out;}remove_cache_extent(&devext_cache->tree,cache);list_del_init(&exist->chunk_list);list_del_init(&exist->device_list);free(exist);gotoagain;}ret=insert_device_extent_record(devext_cache,rec);BUG_ON(ret);out:returnret;free_out:free(rec);gotoout;}staticvoidprint_block_group_info(structblock_group_record*rec,char*prefix){if(prefix)printf("%s",prefix);printf("Block Group: start = %llu, len = %llu, flag = %llx\n",rec->objectid,rec->offset,rec->flags);}staticvoidprint_block_group_tree(structblock_group_tree*tree){structcache_extent*cache;structblock_group_record*rec;printf("All Block Groups:\n");for(cache=first_cache_extent(&tree->tree);cache;cache=next_cache_extent(cache)){rec=container_of(cache,structblock_group_record,cache);print_block_group_info(rec,"\t");}printf("\n");}staticvoidprint_stripe_info(structstripe*data,char*prefix1,char*prefix2,intindex){if(prefix1)printf("%s",prefix1);if(prefix2)printf("%s",prefix2);printf("[%2d] Stripe: devid = %llu, offset = %llu\n",index,data->devid,data->offset);}staticvoidprint_chunk_self_info(structchunk_record*rec,char*prefix){inti;if(prefix)printf("%s",prefix);printf("Chunk: start = %llu, len = %llu, type = %llx, num_stripes = %u\n",rec->offset,rec->length,rec->type_flags,rec->num_stripes);if(prefix)printf("%s",prefix);printf(" Stripes list:\n");for(i=0;i<rec->num_stripes;i++)print_stripe_info(&rec->stripes[i],prefix," ",i);}staticvoidprint_chunk_tree(structcache_tree*tree){structcache_extent*n;structchunk_record*entry;printf("All Chunks:\n");for(n=first_cache_extent(tree);n;n=next_cache_extent(n)){entry=container_of(n,structchunk_record,cache);print_chunk_self_info(entry,"\t");}printf("\n");}staticvoidprint_device_extent_info(structdevice_extent_record*rec,char*prefix){if(prefix)printf("%s",prefix);printf("Device extent: devid = %llu, start = %llu, len = %llu, chunk offset = %llu\n",rec->objectid,rec->offset,rec->length,rec->chunk_offset);}staticvoidprint_device_extent_tree(structdevice_extent_tree*tree){structcache_extent*n;structdevice_extent_record*entry;printf("All Device Extents:\n");for(n=first_cache_extent(&tree->tree);n;n=next_cache_extent(n)){entry=container_of(n,structdevice_extent_record,cache);print_device_extent_info(entry,"\t");}printf("\n");}staticvoidprint_scan_result(structrecover_control*rc){if(!rc->verbose)return;printf("DEVICE SCAN RESULT:\n");printf("Filesystem Information:\n");printf("\tsectorsize: %d\n",rc->sectorsize);printf("\tnodesize: %d\n",rc->nodesize);printf("\ttree root generation: %llu\n",rc->generation);printf("\tchunk root generation: %llu\n",rc->chunk_root_generation);printf("\n");print_all_devices(&rc->fs_devices->devices);print_block_group_tree(&rc->bg);print_chunk_tree(&rc->chunk);print_device_extent_tree(&rc->devext);}staticvoidprint_chunk_info(structchunk_record*chunk,char*prefix){structdevice_extent_record*devext;inti;print_chunk_self_info(chunk,prefix);if(prefix)printf("%s",prefix);if(chunk->bg_rec)print_block_group_info(chunk->bg_rec," ");elseprintf(" No block group.\n");if(prefix)printf("%s",prefix);if(list_empty(&chunk->dextents)){printf(" No device extent.\n");}else{printf(" Device extent list:\n");i=0;list_for_each_entry(devext,&chunk->dextents,chunk_list){if(prefix)printf("%s",prefix);printf("%s[%2d]"," ",i);print_device_extent_info(devext,NULL);i++;}}}staticvoidprint_check_result(structrecover_control*rc){structchunk_record*chunk;structblock_group_record*bg;structdevice_extent_record*devext;inttotal=0;intgood=0;intbad=0;if(!rc->verbose)return;printf("CHECK RESULT:\n");printf("Recoverable Chunks:\n");list_for_each_entry(chunk,&rc->good_chunks,list){print_chunk_info(chunk," ");good++;total++;}list_for_each_entry(chunk,&rc->rebuild_chunks,list){print_chunk_info(chunk," ");good++;total++;}list_for_each_entry(chunk,&rc->unrepaired_chunks,list){print_chunk_info(chunk," ");good++;total++;}printf("Unrecoverable Chunks:\n");list_for_each_entry(chunk,&rc->bad_chunks,list){print_chunk_info(chunk," ");bad++;total++;}printf("\n");printf("Total Chunks:\t\t%d\n",total);printf(" Recoverable:\t\t%d\n",good);printf(" Unrecoverable:\t%d\n",bad);printf("\n");printf("Orphan Block Groups:\n");list_for_each_entry(bg,&rc->bg.block_groups,list)print_block_group_info(bg," ");printf("\n");printf("Orphan Device Extents:\n");list_for_each_entry(devext,&rc->devext.no_chunk_orphans,chunk_list)print_device_extent_info(devext," ");printf("\n");}staticintcheck_chunk_by_metadata(structrecover_control*rc,structbtrfs_root*root,structchunk_record*chunk,intbg_only){intret;inti;intslot;structbtrfs_pathpath;structbtrfs_keykey;structbtrfs_root*dev_root;structstripe*stripe;structbtrfs_dev_extent*dev_extent;structbtrfs_block_group_item*bg_ptr;structextent_buffer*l;btrfs_init_path(&path);if(bg_only)gotobg_check;dev_root=root->fs_info->dev_root;for(i=0;i<chunk->num_stripes;i++){stripe=&chunk->stripes[i];key.objectid=stripe->devid;key.offset=stripe->offset;key.type=BTRFS_DEV_EXTENT_KEY;ret=btrfs_search_slot(NULL,dev_root,&key,&path,0,0);if(ret<0){fprintf(stderr,"Search device extent failed(%d)\n",ret);btrfs_release_path(&path);returnret;}elseif(ret>0){if(rc->verbose)fprintf(stderr,"No device extent[%llu, %llu]\n",stripe->devid,stripe->offset);btrfs_release_path(&path);return-ENOENT;}l=path.nodes[0];slot=path.slots[0];dev_extent=btrfs_item_ptr(l,slot,structbtrfs_dev_extent);if(chunk->offset!=btrfs_dev_extent_chunk_offset(l,dev_extent)){if(rc->verbose)fprintf(stderr,"Device tree mismatch with chunks dev_extent[%llu, %llu], chunk[%llu, %llu]\n",btrfs_dev_extent_chunk_offset(l,dev_extent),btrfs_dev_extent_length(l,dev_extent),chunk->offset,chunk->length);btrfs_release_path(&path);return-ENOENT;}btrfs_release_path(&path);}bg_check:key.objectid=chunk->offset;key.type=BTRFS_BLOCK_GROUP_ITEM_KEY;key.offset=chunk->length;ret=btrfs_search_slot(NULL,root->fs_info->extent_root,&key,&path,0,0);if(ret<0){fprintf(stderr,"Search block group failed(%d)\n",ret);btrfs_release_path(&path);returnret;}elseif(ret>0){if(rc->verbose)fprintf(stderr,"No block group[%llu, %llu]\n",key.objectid,key.offset);btrfs_release_path(&path);return-ENOENT;}l=path.nodes[0];slot=path.slots[0];bg_ptr=btrfs_item_ptr(l,slot,structbtrfs_block_group_item);if(chunk->type_flags!=btrfs_disk_block_group_flags(l,bg_ptr)){if(rc->verbose)fprintf(stderr,"Chunk[%llu, %llu]'s type(%llu) is different with Block Group's type(%llu)\n",chunk->offset,chunk->length,chunk->type_flags,btrfs_disk_block_group_flags(l,bg_ptr));btrfs_release_path(&path);return-ENOENT;}btrfs_release_path(&path);return0;}staticintcheck_all_chunks_by_metadata(structrecover_control*rc,structbtrfs_root*root){structchunk_record*chunk;structchunk_record*next;LIST_HEAD(orphan_chunks);intret=0;interr;list_for_each_entry_safe(chunk,next,&rc->good_chunks,list){err=check_chunk_by_metadata(rc,root,chunk,0);if(err){if(err==-ENOENT)list_move_tail(&chunk->list,&orphan_chunks);elseif(err&&!ret)ret=err;}}list_for_each_entry_safe(chunk,next,&rc->unrepaired_chunks,list){err=check_chunk_by_metadata(rc,root,chunk,1);if(err==-ENOENT)list_move_tail(&chunk->list,&orphan_chunks);elseif(err&&!ret)ret=err;}list_for_each_entry(chunk,&rc->bad_chunks,list){err=check_chunk_by_metadata(rc,root,chunk,1);if(err!=-ENOENT&&!ret)ret=err?err:-EINVAL;}list_splice(&orphan_chunks,&rc->bad_chunks);returnret;}staticintextract_metadata_record(structrecover_control*rc,structextent_buffer*leaf){structbtrfs_keykey;intret=0;inti;u32nritems;nritems=btrfs_header_nritems(leaf);for(i=0;i<nritems;i++){btrfs_item_key_to_cpu(leaf,&key,i);switch(key.type){caseBTRFS_BLOCK_GROUP_ITEM_KEY:pthread_mutex_lock(&rc->rc_lock);ret=process_block_group_item(&rc->bg,leaf,&key,i);pthread_mutex_unlock(&rc->rc_lock);break;caseBTRFS_CHUNK_ITEM_KEY:pthread_mutex_lock(&rc->rc_lock);ret=process_chunk_item(&rc->chunk,leaf,&key,i);pthread_mutex_unlock(&rc->rc_lock);break;caseBTRFS_DEV_EXTENT_KEY:pthread_mutex_lock(&rc->rc_lock);ret=process_device_extent_item(&rc->devext,leaf,&key,i);pthread_mutex_unlock(&rc->rc_lock);break;}if(ret)break;}returnret;}staticinlineintis_super_block_address(u64offset){inti;for(i=0;i<BTRFS_SUPER_MIRROR_MAX;i++){if(offset==btrfs_sb_offset(i))return1;}return0;}staticintscan_one_device(void*dev_scan_struct){structextent_buffer*buf;u64bytenr;intret=0;structdevice_scan*dev_scan=(structdevice_scan*)dev_scan_struct;structrecover_control*rc=dev_scan->rc;structbtrfs_device*device=dev_scan->dev;intfd=dev_scan->fd;intoldtype;ret=pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS,&oldtype);if(ret)return1;buf=malloc(sizeof(*buf)+rc->nodesize);if(!buf)return-ENOMEM;buf->len=rc->nodesize;bytenr=0;while(1){dev_scan->bytenr=bytenr;if(is_super_block_address(bytenr))bytenr+=rc->sectorsize;if(pread64(fd,buf->data,rc->nodesize,bytenr)<rc->nodesize)break;if(memcmp_extent_buffer(buf,rc->fs_devices->fsid,btrfs_header_fsid(),BTRFS_FSID_SIZE)){bytenr+=rc->sectorsize;continue;}if(verify_tree_block_csum_silent(buf,rc->csum_size)){bytenr+=rc->sectorsize;continue;}pthread_mutex_lock(&rc->rc_lock);ret=process_extent_buffer(&rc->eb_cache,buf,device,bytenr);pthread_mutex_unlock(&rc->rc_lock);if(ret)gotoout;if(btrfs_header_level(buf)!=0)gotonext_node;switch(btrfs_header_owner(buf)){caseBTRFS_EXTENT_TREE_OBJECTID:caseBTRFS_DEV_TREE_OBJECTID:/* different tree use different generation */if(btrfs_header_generation(buf)>rc->generation)break;ret=extract_metadata_record(rc,buf);if(ret)gotoout;break;caseBTRFS_CHUNK_TREE_OBJECTID:if(btrfs_header_generation(buf)>rc->chunk_root_generation)break;ret=extract_metadata_record(rc,buf);if(ret)gotoout;break;}next_node:bytenr+=rc->nodesize;}out:close(fd);free(buf);returnret;}staticintscan_devices(structrecover_control*rc){intret=0;intfd;structbtrfs_device*dev;structdevice_scan*dev_scans;pthread_t*t_scans;long*t_rets;intdevnr=0;intdevidx=0;inti;intall_done;list_for_each_entry(dev,&rc->fs_devices->devices,dev_list)devnr++;dev_scans=(structdevice_scan*)malloc(sizeof(structdevice_scan)*devnr);if(!dev_scans)return-ENOMEM;t_scans=(pthread_t*)malloc(sizeof(pthread_t)*devnr);if(!t_scans){free(dev_scans);return-ENOMEM;}t_rets=(long*)malloc(sizeof(long)*devnr);if(!t_rets){free(dev_scans);free(t_scans);return-ENOMEM;}list_for_each_entry(dev,&rc->fs_devices->devices,dev_list){fd=open(dev->name,O_RDONLY);if(fd<0){fprintf(stderr,"Failed to open device %s\n",dev->name);ret=1;gotoout2;}dev_scans[devidx].rc=rc;dev_scans[devidx].dev=dev;dev_scans[devidx].fd=fd;dev_scans[devidx].bytenr=-1;devidx++;}for(i=0;i<devidx;i++){ret=pthread_create(&t_scans[i],NULL,(void*)scan_one_device,(void*)&dev_scans[i]);if(ret)gotoout1;dev_scans[i].bytenr=0;}while(1){all_done=1;for(i=0;i<devidx;i++){if(dev_scans[i].bytenr==-1)continue;ret=pthread_tryjoin_np(t_scans[i],(void**)&t_rets[i]);if(ret==EBUSY){all_done=0;continue;}if(ret||t_rets[i]){ret=1;gotoout1;}dev_scans[i].bytenr=-1;}printf("\rScanning: ");for(i=0;i<devidx;i++){if(dev_scans[i].bytenr==-1)printf("%sDONE in dev%d",i?", ":"",i);elseprintf("%s%llu in dev%d",i?", ":"",dev_scans[i].bytenr,i);}/* clear chars if exist in tail */printf(" ");printf("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b");fflush(stdout);if(all_done){printf("\n");break;}sleep(1);}out1:for(i=0;i<devidx;i++){if(dev_scans[i].bytenr==-1)continue;pthread_cancel(t_scans[i]);}out2:free(dev_scans);free(t_scans);free(t_rets);return!!ret;}staticintbuild_device_map_by_chunk_record(structbtrfs_root*root,structchunk_record*chunk){intret=0;inti;u64devid;u8uuid[BTRFS_UUID_SIZE];u16num_stripes;structbtrfs_fs_info*fs_info=root->fs_info;structbtrfs_mapping_tree*map_tree;structmap_lookup*map;structstripe*stripe;map_tree=&fs_info->mapping_tree;num_stripes=chunk->num_stripes;map=malloc(btrfs_map_lookup_size(num_stripes));if(!map)return-ENOMEM;map->ce.start=chunk->offset;map->ce.size=chunk->length;map->num_stripes=num_stripes;map->io_width=chunk->io_width;map->io_align=chunk->io_align;map->sector_size=chunk->sector_size;map->stripe_len=chunk->stripe_len;map->type=chunk->type_flags;map->sub_stripes=chunk->sub_stripes;for(i=0,stripe=chunk->stripes;i<num_stripes;i++,stripe++){devid=stripe->devid;memcpy(uuid,stripe->dev_uuid,BTRFS_UUID_SIZE);map->stripes[i].physical=stripe->offset;map->stripes[i].dev=btrfs_find_device(fs_info,devid,uuid,NULL);if(!map->stripes[i].dev){free(map);return-EIO;}}ret=insert_cache_extent(&map_tree->cache_tree,&map->ce);returnret;}staticintbuild_device_maps_by_chunk_records(structrecover_control*rc,structbtrfs_root*root){intret=0;structchunk_record*chunk;list_for_each_entry(chunk,&rc->good_chunks,list){ret=build_device_map_by_chunk_record(root,chunk);if(ret)returnret;}list_for_each_entry(chunk,&rc->rebuild_chunks,list){ret=build_device_map_by_chunk_record(root,chunk);if(ret)returnret;}returnret;}staticintblock_group_remove_all_extent_items(structbtrfs_trans_handle*trans,structbtrfs_root*root,structblock_group_record*bg){structbtrfs_fs_info*fs_info=root->fs_info;structbtrfs_keykey;structbtrfs_pathpath;structextent_buffer*leaf;u64start=bg->objectid;u64end=bg->objectid+bg->offset;u64old_val;intnitems;intret;inti;intdel_s,del_nr;btrfs_init_path(&path);root=root->fs_info->extent_root;key.objectid=start;key.offset=0;key.type=BTRFS_EXTENT_ITEM_KEY;again:ret=btrfs_search_slot(trans,root,&key,&path,-1,1);if(ret<0)gotoerr;elseif(ret>0)ret=0;leaf=path.nodes[0];nitems=btrfs_header_nritems(leaf);if(!nitems){/* The tree is empty. */ret=0;gotoerr;}if(path.slots[0]>=nitems){ret=btrfs_next_leaf(root,&path);if(ret<0)gotoerr;if(ret>0){ret=0;gotoerr;}leaf=path.nodes[0];btrfs_item_key_to_cpu(leaf,&key,0);if(key.objectid>=end)gotoerr;btrfs_release_path(&path);gotoagain;}del_nr=0;del_s=-1;for(i=path.slots[0];i<nitems;i++){btrfs_item_key_to_cpu(leaf,&key,i);if(key.objectid>=end)break;if(key.type==BTRFS_BLOCK_GROUP_ITEM_KEY){if(del_nr==0)continue;elsebreak;}if(del_s==-1)del_s=i;del_nr++;if(key.type==BTRFS_EXTENT_ITEM_KEY||key.type==BTRFS_METADATA_ITEM_KEY){old_val=btrfs_super_bytes_used(fs_info->super_copy);if(key.type==BTRFS_METADATA_ITEM_KEY)old_val+=fs_info->nodesize;elseold_val+=key.offset;btrfs_set_super_bytes_used(fs_info->super_copy,old_val);}}if(del_nr){ret=btrfs_del_items(trans,root,&path,del_s,del_nr);if(ret)gotoerr;}if(key.objectid<end){if(key.type==BTRFS_BLOCK_GROUP_ITEM_KEY){key.objectid+=fs_info->sectorsize;key.type=BTRFS_EXTENT_ITEM_KEY;key.offset=0;}btrfs_release_path(&path);gotoagain;}err:btrfs_release_path(&path);returnret;}staticintblock_group_free_all_extent(structbtrfs_root*root,structblock_group_record*bg){structbtrfs_block_group_cache*cache;structbtrfs_fs_info*info;u64start;u64end;info=root->fs_info;cache=btrfs_lookup_block_group(info,bg->objectid);if(!cache)return-ENOENT;start=cache->key.objectid;end=start+cache->key.offset-1;set_extent_bits(&info->block_group_cache,start,end,BLOCK_GROUP_DIRTY);set_extent_dirty(&info->free_space_cache,start,end);btrfs_set_block_group_used(&cache->item,0);return0;}staticintremove_chunk_extent_item(structbtrfs_trans_handle*trans,structrecover_control*rc,structbtrfs_root*root){structchunk_record*chunk;intret=0;list_for_each_entry(chunk,&rc->good_chunks,list){if(!(chunk->type_flags&BTRFS_BLOCK_GROUP_SYSTEM))continue;ret=block_group_remove_all_extent_items(trans,root,chunk->bg_rec);if(ret)returnret;ret=block_group_free_all_extent(root,chunk->bg_rec);if(ret)returnret;}returnret;}staticint__rebuild_chunk_root(structbtrfs_trans_handle*trans,structrecover_control*rc,structbtrfs_root*root){u64min_devid=-1;structbtrfs_device*dev;structextent_buffer*cow;structbtrfs_disk_keydisk_key;intret=0;list_for_each_entry(dev,&rc->fs_devices->devices,dev_list){if(min_devid>dev->devid)min_devid=dev->devid;}btrfs_set_disk_key_objectid(&disk_key,BTRFS_DEV_ITEMS_OBJECTID);btrfs_set_disk_key_type(&disk_key,BTRFS_DEV_ITEM_KEY);btrfs_set_disk_key_offset(&disk_key,min_devid);cow=btrfs_alloc_free_block(trans,root,root->fs_info->nodesize,BTRFS_CHUNK_TREE_OBJECTID,&disk_key,0,0,0);btrfs_set_header_bytenr(cow,cow->start);btrfs_set_header_generation(cow,trans->transid);btrfs_set_header_nritems(cow,0);btrfs_set_header_level(cow,0);btrfs_set_header_backref_rev(cow,BTRFS_MIXED_BACKREF_REV);btrfs_set_header_owner(cow,BTRFS_CHUNK_TREE_OBJECTID);write_extent_buffer(cow,root->fs_info->fsid,btrfs_header_fsid(),BTRFS_FSID_SIZE);write_extent_buffer(cow,root->fs_info->chunk_tree_uuid,btrfs_header_chunk_tree_uuid(cow),BTRFS_UUID_SIZE);root->node=cow;btrfs_mark_buffer_dirty(cow);returnret;}staticint__rebuild_device_items(structbtrfs_trans_handle*trans,structrecover_control*rc,structbtrfs_root*root){structbtrfs_device*dev;structbtrfs_keykey;structbtrfs_dev_itemdev_item_tmp;structbtrfs_dev_item*dev_item=&dev_item_tmp;intret=0;list_for_each_entry(dev,&rc->fs_devices->devices,dev_list){key.objectid=BTRFS_DEV_ITEMS_OBJECTID;key.type=BTRFS_DEV_ITEM_KEY;key.offset=dev->devid;btrfs_set_stack_device_generation(dev_item,0);btrfs_set_stack_device_type(dev_item,dev->type);btrfs_set_stack_device_id(dev_item,dev->devid);btrfs_set_stack_device_total_bytes(dev_item,dev->total_bytes);btrfs_set_stack_device_bytes_used(dev_item,dev->bytes_used);btrfs_set_stack_device_io_align(dev_item,dev->io_align);btrfs_set_stack_device_io_width(dev_item,dev->io_width);btrfs_set_stack_device_sector_size(dev_item,dev->sector_size);memcpy(dev_item->uuid,dev->uuid,BTRFS_UUID_SIZE);memcpy(dev_item->fsid,dev->fs_devices->fsid,BTRFS_UUID_SIZE);ret=btrfs_insert_item(trans,root,&key,dev_item,sizeof(*dev_item));}returnret;}staticint__insert_chunk_item(structbtrfs_trans_handle*trans,structchunk_record*chunk_rec,structbtrfs_root*chunk_root){structbtrfs_keykey;structbtrfs_chunk*chunk=NULL;intret=0;chunk=create_chunk_item(chunk_rec);if(!chunk)return-ENOMEM;key.objectid=BTRFS_FIRST_CHUNK_TREE_OBJECTID;key.type=BTRFS_CHUNK_ITEM_KEY;key.offset=chunk_rec->offset;ret=btrfs_insert_item(trans,chunk_root,&key,chunk,btrfs_chunk_item_size(chunk_rec->num_stripes));free(chunk);returnret;}staticint__rebuild_chunk_items(structbtrfs_trans_handle*trans,structrecover_control*rc,structbtrfs_root*root){structbtrfs_root*chunk_root;structchunk_record*chunk_rec;intret;chunk_root=root->fs_info->chunk_root;list_for_each_entry(chunk_rec,&rc->good_chunks,list){ret=__insert_chunk_item(trans,chunk_rec,chunk_root);if(ret)returnret;}list_for_each_entry(chunk_rec,&rc->rebuild_chunks,list){ret=__insert_chunk_item(trans,chunk_rec,chunk_root);if(ret)returnret;}return0;}staticintrebuild_chunk_tree(structbtrfs_trans_handle*trans,structrecover_control*rc,structbtrfs_root*root){intret=0;root=root->fs_info->chunk_root;ret=__rebuild_chunk_root(trans,rc,root);if(ret)returnret;ret=__rebuild_device_items(trans,rc,root);if(ret)returnret;ret=__rebuild_chunk_items(trans,rc,root);returnret;}staticintrebuild_sys_array(structrecover_control*rc,structbtrfs_root*root){structbtrfs_fs_info*fs_info=root->fs_info;structbtrfs_chunk*chunk;structbtrfs_keykey;structchunk_record*chunk_rec;intret=0;u16num_stripes;btrfs_set_super_sys_array_size(fs_info->super_copy,0);list_for_each_entry(chunk_rec,&rc->good_chunks,list){if(!(chunk_rec->type_flags&BTRFS_BLOCK_GROUP_SYSTEM))continue;num_stripes=chunk_rec->num_stripes;chunk=create_chunk_item(chunk_rec);if(!chunk){ret=-ENOMEM;break;}key.objectid=BTRFS_FIRST_CHUNK_TREE_OBJECTID;key.type=BTRFS_CHUNK_ITEM_KEY;key.offset=chunk_rec->offset;ret=btrfs_add_system_chunk(fs_info,&key,chunk,btrfs_chunk_item_size(num_stripes));free(chunk);if(ret)break;}returnret;}staticintcalculate_bg_used(structbtrfs_root*extent_root,structchunk_record*chunk_rec,structbtrfs_path*path,u64*used){structextent_buffer*node;structbtrfs_keyfound_key;intslot;intret=0;u64used_ret=0;while(1){node=path->nodes[0];slot=path->slots[0];btrfs_item_key_to_cpu(node,&found_key,slot);if(found_key.objectid>=chunk_rec->offset+chunk_rec->length)break;if(found_key.type!=BTRFS_METADATA_ITEM_KEY&&found_key.type!=BTRFS_EXTENT_DATA_KEY)gotonext;if(found_key.type==BTRFS_METADATA_ITEM_KEY)used_ret+=extent_root->fs_info->nodesize;elseused_ret+=found_key.offset;next:if(slot+1<btrfs_header_nritems(node)){slot++;}else{ret=btrfs_next_leaf(extent_root,path);if(ret>0){ret=0;break;}if(ret<0)break;}}if(!ret)*used=used_ret;returnret;}staticint__insert_block_group(structbtrfs_trans_handle*trans,structchunk_record*chunk_rec,structbtrfs_root*extent_root,u64used){structbtrfs_block_group_itembg_item;structbtrfs_keykey;intret=0;btrfs_set_block_group_used(&bg_item,used);btrfs_set_block_group_chunk_objectid(&bg_item,used);btrfs_set_block_group_flags(&bg_item,chunk_rec->type_flags);key.objectid=chunk_rec->offset;key.type=BTRFS_BLOCK_GROUP_ITEM_KEY;key.offset=chunk_rec->length;ret=btrfs_insert_item(trans,extent_root,&key,&bg_item,sizeof(bg_item));returnret;}/* * Search through the extent tree to rebuild the 'used' member of the block * group. * However, since block group and extent item shares the extent tree, * the extent item may also missing. * In that case, we fill the 'used' with the length of the block group to * ensure no write into the block group. * Btrfsck will hate it but we will inform user to call '--init-extent-tree' * if possible, or just salvage as much data as possible from the fs. */staticintrebuild_block_group(structbtrfs_trans_handle*trans,structrecover_control*rc,structbtrfs_root*root){structchunk_record*chunk_rec;structbtrfs_keysearch_key;structbtrfs_pathpath;u64used=0;intret=0;if(list_empty(&rc->rebuild_chunks))return0;btrfs_init_path(&path);list_for_each_entry(chunk_rec,&rc->rebuild_chunks,list){search_key.objectid=chunk_rec->offset;search_key.type=BTRFS_EXTENT_ITEM_KEY;search_key.offset=0;ret=btrfs_search_slot(NULL,root->fs_info->extent_root,&search_key,&path,0,0);if(ret<0)gotoout;ret=calculate_bg_used(root->fs_info->extent_root,chunk_rec,&path,&used);/* * Extent tree is damaged, better to rebuild the whole extent * tree. Currently, change the used to chunk's len to prevent * write/block reserve happening in that block group. */if(ret<0){fprintf(stderr,"Fail to search extent tree for block group: [%llu,%llu]\n",chunk_rec->offset,chunk_rec->offset+chunk_rec->length);fprintf(stderr,"Mark the block group full to prevent block rsv problems\n");used=chunk_rec->length;}btrfs_release_path(&path);ret=__insert_block_group(trans,chunk_rec,root->fs_info->extent_root,used);if(ret<0)gotoout;}out:btrfs_release_path(&path);returnret;}staticstructbtrfs_root*open_ctree_with_broken_chunk(structrecover_control*rc){structbtrfs_fs_info*fs_info;structbtrfs_super_block*disk_super;structextent_buffer*eb;intret;fs_info=btrfs_new_fs_info(1,BTRFS_SUPER_INFO_OFFSET);if(!fs_info){fprintf(stderr,"Failed to allocate memory for fs_info\n");returnERR_PTR(-ENOMEM);}fs_info->is_chunk_recover=1;fs_info->fs_devices=rc->fs_devices;ret=btrfs_open_devices(fs_info->fs_devices,O_RDWR);if(ret)gotoout;disk_super=fs_info->super_copy;ret=btrfs_read_dev_super(fs_info->fs_devices->latest_bdev,disk_super,fs_info->super_bytenr,SBREAD_RECOVER);if(ret){fprintf(stderr,"No valid btrfs found\n");gotoout_devices;}memcpy(fs_info->fsid,&disk_super->fsid,BTRFS_FSID_SIZE);fs_info->sectorsize=btrfs_super_sectorsize(disk_super);fs_info->nodesize=btrfs_super_nodesize(disk_super);fs_info->stripesize=btrfs_super_stripesize(disk_super);ret=btrfs_check_fs_compatibility(disk_super,OPEN_CTREE_WRITES);if(ret)gotoout_devices;btrfs_setup_root(fs_info->chunk_root,fs_info,BTRFS_CHUNK_TREE_OBJECTID);ret=build_device_maps_by_chunk_records(rc,fs_info->chunk_root);if(ret)gotoout_cleanup;ret=btrfs_setup_all_roots(fs_info,0,0);if(ret)gotoout_failed;eb=fs_info->tree_root->node;read_extent_buffer(eb,fs_info->chunk_tree_uuid,btrfs_header_chunk_tree_uuid(eb),BTRFS_UUID_SIZE);returnfs_info->fs_root;out_failed:btrfs_release_all_roots(fs_info);out_cleanup:btrfs_cleanup_all_caches(fs_info);out_devices:btrfs_close_devices(fs_info->fs_devices);out:btrfs_free_fs_info(fs_info);returnERR_PTR(ret);}staticintrecover_prepare(structrecover_control*rc,constchar*path){intret;intfd;structbtrfs_super_block*sb;charbuf[BTRFS_SUPER_INFO_SIZE];structbtrfs_fs_devices*fs_devices;ret=0;fd=open(path,O_RDONLY);if(fd<0){fprintf(stderr,"open %s\n error.\n",path);return-1;}sb=(structbtrfs_super_block*)buf;ret=btrfs_read_dev_super(fd,sb,BTRFS_SUPER_INFO_OFFSET,SBREAD_RECOVER);if(ret){fprintf(stderr,"read super block error\n");gotoout_close_fd;}rc->sectorsize=btrfs_super_sectorsize(sb);rc->nodesize=btrfs_super_nodesize(sb);rc->generation=btrfs_super_generation(sb);rc->chunk_root_generation=btrfs_super_chunk_root_generation(sb);rc->csum_size=btrfs_super_csum_size(sb);/* if seed, the result of scanning below will be partial */if(btrfs_super_flags(sb)&BTRFS_SUPER_FLAG_SEEDING){fprintf(stderr,"this device is seed device\n");ret=-1;gotoout_close_fd;}ret=btrfs_scan_fs_devices(fd,path,&fs_devices,0,SBREAD_RECOVER,0);if(ret)gotoout_close_fd;rc->fs_devices=fs_devices;if(rc->verbose)print_all_devices(&rc->fs_devices->devices);out_close_fd:close(fd);returnret;}staticintbtrfs_get_device_extents(u64chunk_object,structlist_head*orphan_devexts,structlist_head*ret_list){structdevice_extent_record*devext;structdevice_extent_record*next;intcount=0;list_for_each_entry_safe(devext,next,orphan_devexts,chunk_list){if(devext->chunk_offset==chunk_object){list_move_tail(&devext->chunk_list,ret_list);count++;}}returncount;}staticintcalc_num_stripes(u64type){if(type&(BTRFS_BLOCK_GROUP_RAID0|BTRFS_BLOCK_GROUP_RAID10|BTRFS_BLOCK_GROUP_RAID5|BTRFS_BLOCK_GROUP_RAID6))return0;elseif(type&(BTRFS_BLOCK_GROUP_RAID1|BTRFS_BLOCK_GROUP_DUP))return2;elsereturn1;}staticinlineintcalc_sub_nstripes(u64type){if(type&BTRFS_BLOCK_GROUP_RAID10)return2;elsereturn1;}staticintbtrfs_verify_device_extents(structblock_group_record*bg,structlist_head*devexts,intndevexts){structdevice_extent_record*devext;u64stripe_length;intexpected_num_stripes;expected_num_stripes=calc_num_stripes(bg->flags);if(expected_num_stripes&&expected_num_stripes!=ndevexts)return1;if(check_num_stripes(bg->flags,ndevexts)<0)return1;stripe_length=calc_stripe_length(bg->flags,bg->offset,ndevexts);list_for_each_entry(devext,devexts,chunk_list){if(devext->length!=stripe_length)return1;}return0;}staticintbtrfs_rebuild_unordered_chunk_stripes(structrecover_control*rc,structchunk_record*chunk){structdevice_extent_record*devext;structbtrfs_device*device;inti;devext=list_first_entry(&chunk->dextents,structdevice_extent_record,chunk_list);for(i=0;i<chunk->num_stripes;i++){chunk->stripes[i].devid=devext->objectid;chunk->stripes[i].offset=devext->offset;device=btrfs_find_device_by_devid(rc->fs_devices,devext->objectid,0);if(!device)return-ENOENT;BUG_ON(btrfs_find_device_by_devid(rc->fs_devices,devext->objectid,1));memcpy(chunk->stripes[i].dev_uuid,device->uuid,BTRFS_UUID_SIZE);devext=list_next_entry(devext,chunk_list);}return0;}staticintbtrfs_calc_stripe_index(structchunk_record*chunk,u64logical){u64offset=logical-chunk->offset;intstripe_nr;intnr_data_stripes;intindex;stripe_nr=offset/chunk->stripe_len;if(chunk->type_flags&BTRFS_BLOCK_GROUP_RAID0){index=stripe_nr%chunk->num_stripes;}elseif(chunk->type_flags&BTRFS_BLOCK_GROUP_RAID10){index=stripe_nr%(chunk->num_stripes/chunk->sub_stripes);index*=chunk->sub_stripes;}elseif(chunk->type_flags&BTRFS_BLOCK_GROUP_RAID5){nr_data_stripes=chunk->num_stripes-1;index=stripe_nr%nr_data_stripes;stripe_nr/=nr_data_stripes;index=(index+stripe_nr)%chunk->num_stripes;}elseif(chunk->type_flags&BTRFS_BLOCK_GROUP_RAID6){nr_data_stripes=chunk->num_stripes-2;index=stripe_nr%nr_data_stripes;stripe_nr/=nr_data_stripes;index=(index+stripe_nr)%chunk->num_stripes;}else{return-1;}returnindex;}/* calc the logical offset which is the start of the next stripe. */staticinlineu64btrfs_next_stripe_logical_offset(structchunk_record*chunk,u64logical){u64offset=logical-chunk->offset;offset/=chunk->stripe_len;offset*=chunk->stripe_len;offset+=chunk->stripe_len;returnoffset+chunk->offset;}staticintis_extent_record_in_device_extent(structextent_record*er,structdevice_extent_record*dext,int*mirror){inti;for(i=0;i<er->nmirrors;i++){if(er->devices[i]->devid==dext->objectid&&er->offsets[i]>=dext->offset&&er->offsets[i]<dext->offset+dext->length){*mirror=i;return1;}}return0;}staticintbtrfs_rebuild_ordered_meta_chunk_stripes(structrecover_control*rc,structchunk_record*chunk){u64start=chunk->offset;u64end=chunk->offset+chunk->length;structcache_extent*cache;structextent_record*er;structdevice_extent_record*devext;structdevice_extent_record*next;structbtrfs_device*device;LIST_HEAD(devexts);intindex;intmirror;intret;cache=lookup_cache_extent(&rc->eb_cache,start,chunk->length);if(!cache){/* No used space, we can reorder the stripes freely. */ret=btrfs_rebuild_unordered_chunk_stripes(rc,chunk);returnret;}list_splice_init(&chunk->dextents,&devexts);again:er=container_of(cache,structextent_record,cache);index=btrfs_calc_stripe_index(chunk,er->cache.start);BUG_ON(index==-1);if(chunk->stripes[index].devid)gotonext;list_for_each_entry_safe(devext,next,&devexts,chunk_list){if(is_extent_record_in_device_extent(er,devext,&mirror)){chunk->stripes[index].devid=devext->objectid;chunk->stripes[index].offset=devext->offset;memcpy(chunk->stripes[index].dev_uuid,er->devices[mirror]->uuid,BTRFS_UUID_SIZE);index++;list_move(&devext->chunk_list,&chunk->dextents);}}next:start=btrfs_next_stripe_logical_offset(chunk,er->cache.start);if(start>=end)gotono_extent_record;cache=lookup_cache_extent(&rc->eb_cache,start,end-start);if(cache)gotoagain;no_extent_record:if(list_empty(&devexts))return0;if(chunk->type_flags&(BTRFS_BLOCK_GROUP_RAID5|BTRFS_BLOCK_GROUP_RAID6)){/* Fixme: try to recover the order by the parity block. */list_splice_tail(&devexts,&chunk->dextents);return-EINVAL;}/* There is no data on the lost stripes, we can reorder them freely. */for(index=0;index<chunk->num_stripes;index++){if(chunk->stripes[index].devid)continue;devext=list_first_entry(&devexts,structdevice_extent_record,chunk_list);list_move(&devext->chunk_list,&chunk->dextents);chunk->stripes[index].devid=devext->objectid;chunk->stripes[index].offset=devext->offset;device=btrfs_find_device_by_devid(rc->fs_devices,devext->objectid,0);if(!device){list_splice_tail(&devexts,&chunk->dextents);return-EINVAL;}BUG_ON(btrfs_find_device_by_devid(rc->fs_devices,devext->objectid,1));memcpy(chunk->stripes[index].dev_uuid,device->uuid,BTRFS_UUID_SIZE);}return0;}#define BTRFS_ORDERED_RAID (BTRFS_BLOCK_GROUP_RAID0 | \ BTRFS_BLOCK_GROUP_RAID10 | \ BTRFS_BLOCK_GROUP_RAID5 | \ BTRFS_BLOCK_GROUP_RAID6)staticintbtrfs_rebuild_chunk_stripes(structrecover_control*rc,structchunk_record*chunk){intret;/* * All the data in the system metadata chunk will be dropped, * so we need not guarantee that the data is right or not, that * is we can reorder the stripes in the system metadata chunk. */if((chunk->type_flags&BTRFS_BLOCK_GROUP_METADATA)&&(chunk->type_flags&BTRFS_ORDERED_RAID))ret=btrfs_rebuild_ordered_meta_chunk_stripes(rc,chunk);elseif((chunk->type_flags&BTRFS_BLOCK_GROUP_DATA)&&(chunk->type_flags&BTRFS_ORDERED_RAID))ret=1;/* Be handled after the fs is opened. */elseret=btrfs_rebuild_unordered_chunk_stripes(rc,chunk);returnret;}staticintnext_csum(structbtrfs_root*root,structextent_buffer**leaf,structbtrfs_path*path,int*slot,u64*csum_offset,u32*tree_csum,u64end,structbtrfs_key*key){intret=0;structbtrfs_root*csum_root=root->fs_info->csum_root;structbtrfs_csum_item*csum_item;u32blocksize=root->fs_info->sectorsize;u16csum_size=btrfs_super_csum_size(root->fs_info->super_copy);intcsums_in_item=btrfs_item_size_nr(*leaf,*slot)/csum_size;if(*csum_offset>=csums_in_item){++(*slot);*csum_offset=0;if(*slot>=btrfs_header_nritems(*leaf)){ret=btrfs_next_leaf(csum_root,path);if(ret<0)return-1;elseif(ret>0)return1;*leaf=path->nodes[0];*slot=path->slots[0];}btrfs_item_key_to_cpu(*leaf,key,*slot);}if(key->offset+(*csum_offset)*blocksize>=end)return2;csum_item=btrfs_item_ptr(*leaf,*slot,structbtrfs_csum_item);csum_item=(structbtrfs_csum_item*)((unsignedchar*)csum_item+(*csum_offset)*csum_size);read_extent_buffer(*leaf,tree_csum,(unsignedlong)csum_item,csum_size);returnret;}staticu64calc_data_offset(structbtrfs_key*key,structchunk_record*chunk,u64dev_offset,u64csum_offset,u32blocksize){u64data_offset;intlogical_stripe_nr;intdev_stripe_nr;intnr_data_stripes;data_offset=key->offset+csum_offset*blocksize-chunk->offset;nr_data_stripes=chunk->num_stripes;if(chunk->type_flags&BTRFS_BLOCK_GROUP_RAID5)nr_data_stripes-=1;elseif(chunk->type_flags&BTRFS_BLOCK_GROUP_RAID6)nr_data_stripes-=2;logical_stripe_nr=data_offset/chunk->stripe_len;dev_stripe_nr=logical_stripe_nr/nr_data_stripes;data_offset-=logical_stripe_nr*chunk->stripe_len;data_offset+=dev_stripe_nr*chunk->stripe_len;returndev_offset+data_offset;}staticintcheck_one_csum(intfd,u64start,u32len,u32tree_csum){char*data;intret=0;u32csum_result=~(u32)0;data=malloc(len);if(!data)return-1;ret=pread64(fd,data,len,start);if(ret<0||ret!=len){ret=-1;gotoout;}ret=0;csum_result=btrfs_csum_data(data,csum_result,len);btrfs_csum_final(csum_result,(u8*)&csum_result);if(csum_result!=tree_csum)ret=1;out:free(data);returnret;}staticu64item_end_offset(structbtrfs_root*root,structbtrfs_key*key,structextent_buffer*leaf,intslot){u32blocksize=root->fs_info->sectorsize;u16csum_size=btrfs_super_csum_size(root->fs_info->super_copy);u64offset=btrfs_item_size_nr(leaf,slot);offset/=csum_size;offset*=blocksize;offset+=key->offset;returnoffset;}staticintinsert_stripe(structlist_head*devexts,structrecover_control*rc,structchunk_record*chunk,intindex){structdevice_extent_record*devext;structbtrfs_device*dev;devext=list_entry(devexts->next,structdevice_extent_record,chunk_list);dev=btrfs_find_device_by_devid(rc->fs_devices,devext->objectid,0);if(!dev)return-ENOENT;if(btrfs_find_device_by_devid(rc->fs_devices,devext->objectid,1)){error("unexpected: found another device with id %llu",(unsignedlonglong)devext->objectid);return-EINVAL;}chunk->stripes[index].devid=devext->objectid;chunk->stripes[index].offset=devext->offset;memcpy(chunk->stripes[index].dev_uuid,dev->uuid,BTRFS_UUID_SIZE);list_move(&devext->chunk_list,&chunk->dextents);return0;}staticinlineintcount_devext_records(structlist_head*record_list){intnum_of_records=0;structdevice_extent_record*devext;list_for_each_entry(devext,record_list,chunk_list)num_of_records++;returnnum_of_records;}staticintfill_chunk_up(structchunk_record*chunk,structlist_head*devexts,structrecover_control*rc){intret=0;inti;for(i=0;i<chunk->num_stripes;i++){if(!chunk->stripes[i].devid){ret=insert_stripe(devexts,rc,chunk,i);if(ret)break;}}returnret;}#define EQUAL_STRIPE (1 << 0)staticintrebuild_raid_data_chunk_stripes(structrecover_control*rc,structbtrfs_root*root,structchunk_record*chunk,u8*flags){inti;intret=0;intslot;structbtrfs_pathpath;structbtrfs_keyprev_key;structbtrfs_keykey;structbtrfs_root*csum_root;structextent_buffer*leaf;structdevice_extent_record*devext;structdevice_extent_record*next;structbtrfs_device*dev;u64start=chunk->offset;u64end=start+chunk->stripe_len;u64chunk_end=chunk->offset+chunk->length;u64csum_offset=0;u64data_offset;u32blocksize=root->fs_info->sectorsize;u32tree_csum;intindex=0;intnum_unordered=0;LIST_HEAD(unordered);LIST_HEAD(candidates);csum_root=root->fs_info->csum_root;btrfs_init_path(&path);list_splice_init(&chunk->dextents,&candidates);again:if(list_is_last(candidates.next,&candidates))gotoout;key.objectid=BTRFS_EXTENT_CSUM_OBJECTID;key.type=BTRFS_EXTENT_CSUM_KEY;key.offset=start;ret=btrfs_search_slot(NULL,csum_root,&key,&path,0,0);if(ret<0){fprintf(stderr,"Search csum failed(%d)\n",ret);gotofail_out;}leaf=path.nodes[0];slot=path.slots[0];if(ret>0){if(slot>=btrfs_header_nritems(leaf)){ret=btrfs_next_leaf(csum_root,&path);if(ret<0){fprintf(stderr,"Walk tree failed(%d)\n",ret);gotofail_out;}elseif(ret>0){slot=btrfs_header_nritems(leaf)-1;btrfs_item_key_to_cpu(leaf,&key,slot);if(item_end_offset(root,&key,leaf,slot)>start){csum_offset=start-key.offset;csum_offset/=blocksize;gotonext_csum;}gotonext_stripe;}leaf=path.nodes[0];slot=path.slots[0];}btrfs_item_key_to_cpu(leaf,&key,slot);ret=btrfs_previous_item(csum_root,&path,0,BTRFS_EXTENT_CSUM_KEY);if(ret<0)gotofail_out;elseif(ret>0){if(key.offset>=end)gotonext_stripe;elsegotonext_csum;}leaf=path.nodes[0];slot=path.slots[0];btrfs_item_key_to_cpu(leaf,&prev_key,slot);if(item_end_offset(root,&prev_key,leaf,slot)>start){csum_offset=start-prev_key.offset;csum_offset/=blocksize;btrfs_item_key_to_cpu(leaf,&key,slot);}else{if(key.offset>=end)gotonext_stripe;}if(key.offset+csum_offset*blocksize>chunk_end)gotoout;}next_csum:ret=next_csum(root,&leaf,&path,&slot,&csum_offset,&tree_csum,end,&key);if(ret<0){fprintf(stderr,"Fetch csum failed\n");gotofail_out;}elseif(ret==1){if(!(*flags&EQUAL_STRIPE))*flags|=EQUAL_STRIPE;gotoout;}elseif(ret==2)gotonext_stripe;list_for_each_entry_safe(devext,next,&candidates,chunk_list){data_offset=calc_data_offset(&key,chunk,devext->offset,csum_offset,blocksize);dev=btrfs_find_device_by_devid(rc->fs_devices,devext->objectid,0);if(!dev){ret=1;gotofail_out;}BUG_ON(btrfs_find_device_by_devid(rc->fs_devices,devext->objectid,1));ret=check_one_csum(dev->fd,data_offset,blocksize,tree_csum);if(ret<0)gotofail_out;elseif(ret>0)list_move(&devext->chunk_list,&unordered);}if(list_empty(&candidates)){num_unordered=count_devext_records(&unordered);if(chunk->type_flags&BTRFS_BLOCK_GROUP_RAID6&&num_unordered==2){btrfs_release_path(&path);ret=fill_chunk_up(chunk,&unordered,rc);returnret;}gotonext_stripe;}if(list_is_last(candidates.next,&candidates)){index=btrfs_calc_stripe_index(chunk,key.offset+csum_offset*blocksize);BUG_ON(index==-1);if(chunk->stripes[index].devid)gotonext_stripe;ret=insert_stripe(&candidates,rc,chunk,index);if(ret)gotofail_out;}else{csum_offset++;gotonext_csum;}next_stripe:start=btrfs_next_stripe_logical_offset(chunk,start);end=min(start+chunk->stripe_len,chunk_end);list_splice_init(&unordered,&candidates);btrfs_release_path(&path);csum_offset=0;if(end<chunk_end)gotoagain;out:ret=0;list_splice_init(&candidates,&unordered);num_unordered=count_devext_records(&unordered);if(num_unordered==1){for(i=0;i<chunk->num_stripes;i++){if(!chunk->stripes[i].devid){index=i;break;}}ret=insert_stripe(&unordered,rc,chunk,index);if(ret)gotofail_out;}else{if((num_unordered==2&&chunk->type_flags&BTRFS_BLOCK_GROUP_RAID5)||(num_unordered==3&&chunk->type_flags&BTRFS_BLOCK_GROUP_RAID6)){ret=fill_chunk_up(chunk,&unordered,rc);}}fail_out:ret=!!ret||(list_empty(&unordered)?0:1);list_splice_init(&candidates,&chunk->dextents);list_splice_init(&unordered,&chunk->dextents);btrfs_release_path(&path);returnret;}staticintbtrfs_rebuild_ordered_data_chunk_stripes(structrecover_control*rc,structbtrfs_root*root){structchunk_record*chunk;structchunk_record*next;intret=0;interr;u8flags;list_for_each_entry_safe(chunk,next,&rc->unrepaired_chunks,list){if((chunk->type_flags&BTRFS_BLOCK_GROUP_DATA)&&(chunk->type_flags&BTRFS_ORDERED_RAID)){flags=0;err=rebuild_raid_data_chunk_stripes(rc,root,chunk,&flags);if(err){list_move(&chunk->list,&rc->bad_chunks);if(flags&EQUAL_STRIPE)fprintf(stderr,"Failure: too many equal stripes in chunk[%llu %llu]\n",chunk->offset,chunk->length);if(!ret)ret=err;}elselist_move(&chunk->list,&rc->good_chunks);}}returnret;}staticintbtrfs_recover_chunks(structrecover_control*rc){structchunk_record*chunk;structblock_group_record*bg;structblock_group_record*next;LIST_HEAD(new_chunks);LIST_HEAD(devexts);intnstripes;intret;/* create the chunk by block group */list_for_each_entry_safe(bg,next,&rc->bg.block_groups,list){nstripes=btrfs_get_device_extents(bg->objectid,&rc->devext.no_chunk_orphans,&devexts);chunk=calloc(1,btrfs_chunk_record_size(nstripes));if(!chunk)return-ENOMEM;INIT_LIST_HEAD(&chunk->dextents);chunk->bg_rec=bg;chunk->cache.start=bg->objectid;chunk->cache.size=bg->offset;chunk->objectid=BTRFS_FIRST_CHUNK_TREE_OBJECTID;chunk->type=BTRFS_CHUNK_ITEM_KEY;chunk->offset=bg->objectid;chunk->generation=bg->generation;chunk->length=bg->offset;chunk->owner=BTRFS_CHUNK_TREE_OBJECTID;chunk->stripe_len=BTRFS_STRIPE_LEN;chunk->type_flags=bg->flags;chunk->io_width=BTRFS_STRIPE_LEN;chunk->io_align=BTRFS_STRIPE_LEN;chunk->sector_size=rc->sectorsize;chunk->sub_stripes=calc_sub_nstripes(bg->flags);ret=insert_cache_extent(&rc->chunk,&chunk->cache);if(ret==-EEXIST){error("duplicate entry in cache start %llu size %llu",(unsignedlonglong)chunk->cache.start,(unsignedlonglong)chunk->cache.size);free(chunk);returnret;}BUG_ON(ret);list_del_init(&bg->list);if(!nstripes){list_add_tail(&chunk->list,&rc->bad_chunks);continue;}list_splice_init(&devexts,&chunk->dextents);ret=btrfs_verify_device_extents(bg,&devexts,nstripes);if(ret){list_add_tail(&chunk->list,&rc->bad_chunks);continue;}chunk->num_stripes=nstripes;ret=btrfs_rebuild_chunk_stripes(rc,chunk);if(ret>0)list_add_tail(&chunk->list,&rc->unrepaired_chunks);elseif(ret<0)list_add_tail(&chunk->list,&rc->bad_chunks);elselist_add_tail(&chunk->list,&rc->good_chunks);}/* * Don't worry about the lost orphan device extents, they don't * have its chunk and block group, they must be the old ones that * we have dropped. */return0;}staticinlineintis_chunk_overlap(structchunk_record*chunk1,structchunk_record*chunk2){if(chunk1->offset>=chunk2->offset+chunk2->length||chunk1->offset+chunk1->length<=chunk2->offset)return0;return1;}/* Move invalid(overlap with good chunks) rebuild chunks to bad chunk list */staticvoidvalidate_rebuild_chunks(structrecover_control*rc){structchunk_record*good;structchunk_record*rebuild;structchunk_record*tmp;list_for_each_entry_safe(rebuild,tmp,&rc->rebuild_chunks,list){list_for_each_entry(good,&rc->good_chunks,list){if(is_chunk_overlap(rebuild,good)){list_move_tail(&rebuild->list,&rc->bad_chunks);break;}}}}/* * Return 0 when successful, < 0 on error and > 0 if aborted by user */intbtrfs_recover_chunk_tree(constchar*path,intverbose,intyes){intret=0;structbtrfs_root*root=NULL;structbtrfs_trans_handle*trans;structrecover_controlrc;init_recover_control(&rc,verbose,yes);ret=recover_prepare(&rc,path);if(ret){fprintf(stderr,"recover prepare error\n");returnret;}ret=scan_devices(&rc);if(ret){fprintf(stderr,"scan chunk headers error\n");gotofail_rc;}if(cache_tree_empty(&rc.chunk)&&cache_tree_empty(&rc.bg.tree)&&cache_tree_empty(&rc.devext.tree)){fprintf(stderr,"no recoverable chunk\n");gotofail_rc;}print_scan_result(&rc);ret=check_chunks(&rc.chunk,&rc.bg,&rc.devext,&rc.good_chunks,&rc.bad_chunks,&rc.rebuild_chunks,1);if(ret){if(!list_empty(&rc.bg.block_groups)||!list_empty(&rc.devext.no_chunk_orphans)){ret=btrfs_recover_chunks(&rc);if(ret)gotofail_rc;}}else{print_check_result(&rc);printf("Check chunks successfully with no orphans\n");gotofail_rc;}validate_rebuild_chunks(&rc);print_check_result(&rc);root=open_ctree_with_broken_chunk(&rc);if(IS_ERR(root)){fprintf(stderr,"open with broken chunk error\n");ret=PTR_ERR(root);gotofail_rc;}ret=check_all_chunks_by_metadata(&rc,root);if(ret){fprintf(stderr,"The chunks in memory can not match the metadata of the fs. Repair failed.\n");gotofail_close_ctree;}ret=btrfs_rebuild_ordered_data_chunk_stripes(&rc,root);if(ret){fprintf(stderr,"Failed to rebuild ordered chunk stripes.\n");gotofail_close_ctree;}if(!rc.yes){ret=ask_user("We are going to rebuild the chunk tree on disk, it might destroy the old metadata on the disk, Are you sure?");if(!ret){ret=1;gotofail_close_ctree;}}trans=btrfs_start_transaction(root,1);BUG_ON(IS_ERR(trans));ret=remove_chunk_extent_item(trans,&rc,root);BUG_ON(ret);ret=rebuild_chunk_tree(trans,&rc,root);BUG_ON(ret);ret=rebuild_sys_array(&rc,root);BUG_ON(ret);ret=rebuild_block_group(trans,&rc,root);if(ret){printf("Fail to rebuild block groups.\n");printf("Recommend to run 'btrfs check --init-extent-tree <dev>' after recovery\n");}btrfs_commit_transaction(trans,root);fail_close_ctree:close_ctree(root);fail_rc:free_recover_control(&rc);returnret;}