(***********************************************************************)(* clean_keydb.ml - Executable: Cleans up various problems that occur *)(* in key databases *)(* *)(* Currently, this includes: *)(* - Merging all mergeable keys *)(* - Eliminating keys with unparseable packet sequences *)(* - Eliminating duplicates *)(* (Note, this doesn't get rid of ALL duplicates, for instance, if *)(* the same signature is used to sign two different keys, it is not *)(* removed. Removal is only done if it leaves a reasonable packet *)(* structure in place.) *)(* *)(* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, *)(* 2011, 2012, 2013 Yaron Minsky and Contributors *)(* *)(* This file is part of SKS. SKS is free software; you can *)(* redistribute it and/or modify it under the terms of the GNU General *)(* Public License as published by the Free Software Foundation; either *)(* version 2 of the License, or (at your option) any later version. *)(* *)(* This program is distributed in the hope that it will be useful, but *)(* WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* General Public License for more details. *)(* *)(* You should have received a copy of the GNU General Public License *)(* along with this program; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA or see <http://www.gnu.org/licenses/>. *)(***********************************************************************)moduleF(M:sigend)=structopenStdLabelsopenMoreLabelsopenPrintfopenArgopenCommonmoduleSet=PSet.SetmoduleMap=PMap.MapmoduleUnix=UnixLabelsopenPacketopenBdbletsettings={Keydb.withtxn=!Settings.transactions;Keydb.cache_bytes=!Settings.cache_bytes;Keydb.pagesize=!Settings.pagesize;Keydb.keyid_pagesize=!Settings.keyid_pagesize;Keydb.meta_pagesize=!Settings.meta_pagesize;Keydb.subkeyid_pagesize=!Settings.subkeyid_pagesize;Keydb.time_pagesize=!Settings.time_pagesize;Keydb.tqueue_pagesize=!Settings.tqueue_pagesize;Keydb.word_pagesize=!Settings.word_pagesize;Keydb.dbdir=Lazy.forceSettings.dbdir;Keydb.dumpdir=Lazy.forceSettings.dumpdir;}(** we need full keydb access because we're playing directly with databases and cursors and such *)moduleKeydb=Keydb.Unsafelet(|=)mapkey=Map.findkeymaplet(|<)map(key,data)=Map.add~key~datamapletctr=ref0lettick()=incrctr;if!ctrmod10000=0thenperror"%d thousand steps processed"(!ctr/1000)typeaction=Deleteofkey|Swapof(key*key)letdo_actionaction=matchactionwith|Swap(key1,key2)->Keydb.swap_keyskey1key2|Deletekey->Keydb.delete_keykeyletdo_optfopt=matchoptwith|None->()|Somex->fx(** Canonicalize a key if it is required. This assumes that the given key is actually in the database *)letcanonicalize_keykey=tryletckey=Fixkey.canonicalizekeyinifKeyHash.hashckey<>KeyHash.hashkeythenbeginperror"Swap found: %s -> %s"(KeyHash.hexify(KeyHash.hashkey))(KeyHash.hexify(KeyHash.hashckey));Some(Swap(key,ckey))endelseNonewithFixkey.Bad_key->perror"Key to be deleted: %s"(KeyHash.hexify(KeyHash.hashkey));Some(Deletekey)letat_once=match!Settings.nwith0->10000|n->nletcanonicalize_indirect()=ctr:=0;perror"Starting indirect canonicalization";letdbs=Keydb.get_dbs()inletfilearray=dbs.Keydb.dump.Keydb.filearrayinletactions=ref[]inletnum_actions=ref0inletfilter_actionsactions=letactions=List.mapactions~f:(function|Deletekeyasaction->(KeyHash.hashkey,action)|Swap(key1,key2)asaction->(KeyHash.hashkey1,action))inletactions=List.sort~cmp:compareactionsinletactions=List.filteractions~f:(fun(hash,action)->Keydb.has_hashhash)inList.map~f:(fun(hash,action)->action)actionsinletrun_stored_actions()=letfilt_actions=filter_actions!actionsinperror"doing %d out of %d update actions"(List.lengthfilt_actions)(List.length!actions);letdbactions=List.fold_left~init:[]filt_actions~f:(funlistaction->matchactionwithDeletekey->(Keydb.key_to_metadatakey,Keydb.DeleteKey)::list|Swap(key1,key2)->(Keydb.key_to_metadatakey1,Keydb.DeleteKey)::(Keydb.key_to_metadatakey2,Keydb.AddKey)::list)inKeydb.apply_md_updates(Array.of_listdbactions);Keydb.unconditional_checkpoint();actions:=[];num_actions:=0inletadd_actionaction=actions:=action::!actions;incrnum_actions;if!num_actions>=at_oncethenrun_stored_actions()inArray.iterifilearray~f:(funiinchan->perror"Starting keydump %d"i;seek_ininchan0;letcin=newChannel.sys_in_channelinchaninletget=Key.get_of_channelcinintrywhiletruedotick();letkey=get()inletaction=canonicalize_keykeyindo_optadd_actionactiondonewithNot_found->());run_stored_actions();perror"Indirect canonicalization complete"(** iterate through the entire database, replacing all non-canonical keys with canonicalized versions. Delete all non-canonicalizable keys. Only work on keys stored directly in the database. Keys stored indirectly will be fixed by scanning the initial keydump. Note that this is not nearly so highly-optimized as canonicalize_indirect. However, for most keyservers, most of the keys will be in the indirect keydump anyway. *)letcanonicalize_direct()=ctr:=0;perror"Starting direct canonicalization";letclean~hash~keystr=letskey=Keydb.skey_of_stringkeystrinifnot(Keydb.skey_is_offsetskey)thenletkey=Keydb.key_of_skeyskeyintick();(* ignore offsets, they're handled elsewhere *)do_optdo_action(canonicalize_keykey)inKeydb.raw_iterclean;perror"Direct canonicalization complete"letcanonicalize()=canonicalize_indirect();canonicalize_direct()(***************************************************************)(***************************************************************)(***************************************************************)(** internal function: retrieves list of (key,data) duplicates for a given cursor *)letrecget_dups_reccursoraccum=trylet(key,data)=Cursor.getcursorCursor.NEXT_DUP[]inget_dups_reccursor((key,data)::accum)withNot_found->accum(** returns pair of key and duplicate data for the given cursor *)letget_dupscursor=letpairs=get_dups_reccursor[]inmatchpairswith[]->failwith"get_dups retrieved empty list"|(key,data)::tail->letdtail=List.maptail~f:(fun(tkey,tdata)->iftkey<>keythenfailwith"get_dups retrieved non-duplicate"elsetdata)in(key,data::dtail)(** checks if a sorted list has duplicates *)letrechas_dupslist=matchlistwith[]->false|[hd]->false|hd1::hd2::tl->ifhd1=hd2thentrueelsehas_dups(hd2::tl)(** merges keys given the key hashes. The [keyid] argument is there just to make logging more understandable *)letmerge_from_hasheskeyidhashes=(* Sort hashes and remove duplicates, if any *)lethashes=List.sort~cmp:comparehashesinlethashes=ifhas_dupshashesthen(perror"Duplicates found in hash list";MList.deduphashes)elsehashesin(** fetches a key from its hash *)letkey_from_hashhash=tryletkey=Keydb.get_by_hashhashinletnewhash=KeyHash.hashkeyinifnewhash<>hashthenperror"Key hashes do not match up:\n\trequested: %s\n\tfound: %s"(KeyHash.hexifyhash)(KeyHash.hexifynewhash);SomekeywithNot_found->perror"Database corruption: Key matched up to keyid not found in database:\n\tkeyid: %s\n\thash: %s"(Fingerprint.keyid_to_stringkeyid)(KeyHash.hexifyhash);Noneinletkeys=strip_opt(List.map~f:key_from_hashhashes)in(* compute the list of replacements and apply them *)letreplacements=Fixkey.compute_merge_replacementskeysinifList.lengthreplacements>0thenperror"%d replacements found"(List.lengthreplacements);List.iterreplacements~f:(fun(delete_list,newkey)->perror"replacing %d keys with single merged key"(List.lengthdelete_list);List.iterdelete_list~f:(funkey->perror"removing: %s"(KeyHash.hexify(KeyHash.hashkey)));perror"adding: %s"(KeyHash.hexify(KeyHash.hashnewkey));Keydb.replacedelete_listnewkey;perror"Transaction complete")(** find all sets of key with the same keyid and merge them if possible *)letmerge()=ctr:=0;perror"Starting key merge";letdbs=Keydb.get_dbs()inletc=Cursor.createdbs.Keydb.keyidinlet(first_keyid,first_hash)=Cursor.getcCursor.FIRST[]inletfinished=reffalseandkeyid=reffirst_keyidandhash=reffirst_hashinwhilenot!finisheddotick();ifCursor.countc>1then(let(dup_keyid,hashes)=get_dupscinifdup_keyid<>!keyidthenfailwith"Failure retrieving duplicates";lethashes=!hash::hashesinperror"%s"("Multiple keys found with same ID. "^"merge_from_hashes called");List.iterhashes~f:(funhash->perror"Hash: %s"(KeyHash.hexifyhash));merge_from_hashes!keyidhashes);trylet(new_keyid,new_hash)=Cursor.getcCursor.NEXT[]inkeyid:=new_keyid;hash:=new_hashwithNot_found->finished:=truedone;perror"Completed key merge"(** Run filters that are not already contained in [applied_filters] *)letrunapplied_filters=(* only do canonicalize if it's necessary *)ifnot(List.mem"yminsky.dedup"applied_filters)then(perror"Deduping keys in database";canonicalize();Keydb.set_meta~key:"filters"~data:"yminsky.dedup";Keydb.unconditional_checkpoint();)elseperror"Database already deduped";(* note: if dedup was done, merge should be done again *)ifnot(List.mem"yminsky.dedup"applied_filters)||not(List.mem"yminsky.merge"applied_filters)then(perror"Merging keys in database";merge();Keydb.set_meta~key:"filters"~data:"yminsky.dedup,yminsky.merge";Keydb.unconditional_checkpoint();)elseperror"Database already merged"letcomma=Str.regexp","letrun()=set_logfile"clean";perror"Running SKS %s%s"Common.versionCommon.version_suffix;Keydb.open_dbssettings;perror"Keydb opened";letapplied_filters=tryStr.splitcomma(Keydb.get_meta"filters")withNot_found->[]inrunapplied_filters;Keydb.close_dbs()end