# un-taint path$ENV{'PATH'}='/bin:/sbin:/usr/bin:/usr/local/bin';delete@ENV{'IFS','CDPATH','ENV','BASH_ENV'};# Turn off line buffering on output$|=1;## We don't want to run this script unless its the real version.#if($EUID!=0){die("Must be root! Maybe its a development version?");}## Parse command arguments. Once we return from getopts, all that should# left are the required arguments.#%options=();if(!getopts($optlist,\%options)){usage();}if(defined($options{"d"})){

my$query_result=DBQueryFatal("select node_id from reserved where "."pid='$pid' and eid='$eid'");if($query_result->numrows==0){printSTDOUT"There are no nodes reserved in pid/eid $pid/$eid\n";usage();}while(@row=$query_result->fetchrow_array()){push(@nodes,$row[0]);}}else{if(@ARGV==0){usage();}# Untaint the nodes.foreachmy$node(@ARGV){if($node=~/^([-\@\w]+)$/){$node=$1;}else{die("Bad node name: $node.");}push(@nodes,$node);}

# Another shark hack. Well, perhaps not. We really don't want 50 nodes# all rebooting at the same time, PCs *or* sharks. Lets order them# so that the shelves are grouped together at least, and issue the reboots# in batches.

my@sortednodes=sort(@nodes);while(@sortednodes){my@batch=();my$i=0;my$lastshelf=0;while($i<8&&@sortednodes>0){my$node=shift(@sortednodes);my$shelf;my$unit;## The point of this sillyness is stop at each shelf transition.#if(IsShelved($node,\$shelf,\$unit)){if($lastshelf&&$lastshelfne$shelf){unshift(@sortednodes,$node);last;}$lastshelf=$shelf;}push(@batch,$node);$i++;}if($force){## In force mode, call the power program for the whole batch, and# continue on. We don't wait for them to go down or reboot.#system("$power cycle @batch");if($?){exit($?>>8);}}else{## Fire off a reboot process so that we can overlap them all.# We need the pid so we can wait for them all before preceeding.#foreachmy$node(@batch){$mypid=RebootNode($node);$pids{$node}=$mypid;}}# # If there are more nodes to go, then lets pause a bit so that we# do not get a flood of machines coming up all at the same exact# moment.#if(@sortednodes){printSTDOUT"Pausing to give some nodes time to reboot ...\n";if($lastshelf){sleep(15);

my$mypid=$pids{$node};waitpid($mypid,0);if($?){$failed++;printSTDERR"Reboot of node $node failed!\n";}else{printSTDOUT"$node rebooting ...\n";}}if($debug&&$failed){printSTDERR"$failed nodes could not be rebooted\n";}exit$failed;## Reboot a node in a child process. Return the pid to the parent so# that it can wait on all the children later.# sub RebootNode{local($pc)=@_;local($status,$syspid,$mypid);printSTDOUT"Rebooting $pc ...\n";$mypid=fork();if($mypid){return$mypid;}## See if the machine is pingable. If its not pingable, then we just# power cycle the machine rather than wait for ssh to time out.## ping returns 0 if any packets make it through. #

printSTDERR"$pc appears to be dead. Power cycling ...\n"if$debug;if(PowerCycle($pc)){exit(-1);}exit(0);}## Machine is pingable at least. Try to reboot it gracefully,# or power cycle anyway if that does not work. #

system("$ipod$pc");}}else{exec("$ssh$pc /sbin/reboot");exit(0);}## Okay, before we power cycle lets really make sure. We wait a while# for it to stop responding to pings, and if it never goes silent,# punch the power button.#if(WaitTillDead($pc)==0){exit(0);}printSTDERR"$pc is still running. Power cycling ...\n"if$debug;if(PowerCycle($pc)){exit(-1);}exit(0);}## Power cycle a PC using the testbed power program.#sub PowerCycle{local($pc)=@_;system("$power cycle $pc");return$?>>8;}## Wait until a machine stops returning ping packets.# sub WaitTillDead{local($pc)=@_;local($status);printSTDERR"Waiting for $pc to die off\n"if$debug;## Sigh, a long ping results in the script waiting until all the# packets are sent from all the pings, before it will exit. So,# loop doing a bunch of shorter pings.#