"""Maintainer: Marko Toplak"""from__future__importabsolute_import,with_statementif__name__=="__main__":__package__="Orange.bio"importcPickleaspickle,os,tempfile,sysfromcollectionsimportdefaultdictimportOrange.coreasorangefromOrange.orngimportorngServerFilesfrom.importobiGO,obiKEGG,obiTaxonomysfdomain="gene_sets"defnth(l,n):return[a[n]forainl]fromOrange.bio.genesetimportGeneSet,GeneSets,GenesetRegExceptiondefgoGeneSets(org):"""Returns gene sets from GO."""ontology=obiGO.Ontology()annotations=obiGO.Annotations(org,ontology=ontology)genesets=[]link_fmt="http://amigo.geneontology.org/cgi-bin/amigo/term-details.cgi?term=%s"fortermn,terminontology.terms.items():genes=annotations.GetAllGenes(termn)hier=("GO",term.namespace)iflen(genes)>0:gs=GeneSet(id=termn,name=term.name,genes=genes,hierarchy=hier,organism=org,link=link_fmt%termn)genesets.append(gs)returnGeneSets(genesets)defkeggGeneSets(org):""" Returns gene sets from KEGG pathways. """kegg=obiKEGG.KEGGOrganism(org)genesets=[]foridinkegg.pathways():printidpway=obiKEGG.KEGGPathway(id)hier=("KEGG","pathways")gs=GeneSet(id=id,name=pway.title,genes=kegg.get_genes_by_pathway(id),hierarchy=hier,organism=org,link=pway.link)genesets.append(gs)returnGeneSets(genesets)defdictyMutantSets():""" Return dicty mutant phenotype gene sets from Dictybase """from.importobiDictyMutantslink_fmt="http://dictybase.org/db/cgi-bin/dictyBase/SC/scsearch.pl?searchdb=strains&search_term=%s&column=all&B1=Submit"#genesets = [GeneSet(id=mutant.name, name=mutant.descriptor, genes=obiDictyMutants.mutant_genes(mutant), hierarchy=("Dictybase", "Mutants"), organism="352472", # 352472 gathered from obiGO.py code_map -> Dicty identifier# link=(link_fmt % mutant.name if mutant.name else None)) \# for mutant in obiDictyMutants.mutants()]genesets=[GeneSet(id=phenotype,name=phenotype,genes=[obiDictyMutants.mutant_genes(mutant)[0]formutantinmutants],hierarchy=("Dictybase","Phenotypes"),organism="352472",# 352472 gathered from obiGO.py code_map -> Dicty identifierlink="") \
forphenotype,mutantsinobiDictyMutants.phenotype_mutants().items()]returnGeneSets(genesets)defcytobandGeneSets():""" Create cytoband gene sets from Stanford Microarray Database """importurllib2url="http://www-stat.stanford.edu/~tibs/GSA/cytobands-stanford.gmt"stream=urllib2.urlopen(url)data=stream.read().splitlines()genesets=[]forbandindata:b=band.split("\t")genesets.append(GeneSet(id=b[0],name=b[1],genes=b[2:]ifb[2:]else[],hierarchy=("Cytobands",),organism="9606",link=""))returnGeneSets(genesets)defomimGeneSets():""" Return gene sets from OMIM (Online Mendelian Inheritance in Man) diseses """from.importobiOMIMgenesets=[GeneSet(id=disease.id,name=disease.name,genes=obiOMIM.disease_genes(disease),hierarchy=("OMIM",),organism="9606",link=("http://www.omim.org/entry/%s"%disease.idifdisease.idelseNone)) \
fordiseaseinobiOMIM.diseases()]returnGeneSets(genesets)defmiRNAGeneSets(org):""" Return gene sets from miRNA targets """from.importobimiRNAorg_code=obiKEGG.from_taxid(org)link_fmt="http://www.mirbase.org/cgi-bin/mirna_entry.pl?acc=%s"mirnas=[(id,obimiRNA.get_info(id))foridinobimiRNA.ids(org_code)]genesets=[GeneSet(id=mirna.matACC,name=mirna.matID,genes=mirna.targets.split(","),hierarchy=("miRNA","Targets"),organism=org,link=link_fmt%mirna.matID)forid,mirnainmirnas]returnGeneSets(genesets)defgo_miRNASets(org,ontology=None,enrichment=True,pval=0.05,treshold=0.04):from.importobimiRNA,obiGOmirnas=obimiRNA.ids(int(org))ifontologyisNone:ontology=obiGO.Ontology()annotations=obiGO.Annotations(org,ontology=ontology)go_sets=obimiRNA.get_GO(mirnas,annotations,enrichment=enrichment,pval=pval,goSwitch=False)printgo_setsgo_sets=obimiRNA.filter_GO(go_sets,annotations,treshold=treshold)link_fmt="http://amigo.geneontology.org/cgi-bin/amigo/term-details.cgi?term=%s"gsets=[GeneSet(id=key,name=ontology[key].name,genes=value,hierarchy=("miRNA","go_sets",),organism=org,link=link_fmt%key)forkey,valueingo_sets.items()]gset=GeneSets(gsets)returngsetdefloadGMT(contents,name):""" Eech line consists of tab separated elements. First is the geneset name, next is it's description. For now the description is skipped. """defhline(s):tabs=[tab.strip()fortabins.split("\t")]returnGeneSet(id=tabs[0],description=tabs[1],hierarchy=(name,),genes=tabs[2:])defhandleNELines(s,fn):""" Run function on nonempty lines of a string. Return a list of results for each line. """lines=(l.strip()forlins.splitlines())return[fn(l)forlinlinesifl]returnGeneSets(handleNELines(contents,hline))"""We have multiple paths for gene set data:buffer/bigfiles/gene_setsandbuffer/gene_sets_localboth have available.txt"""defomakedirs(dir):try:os.makedirs(dir)exceptOSError:passdeflocal_path():""" Returns local path for gene sets. Creates it if it does not exists yet. """fromOrange.orngimportorngEnvironpth=os.path.join(orngEnviron.directoryNames["bufferDir"],"gene_sets_local")omakedirs(pth)returnpthdefbuild_index(dir):""" Returns gene set availability index for some folder. """passdeffilename(hierarchy,organism):""" Obtain a filename for given hierarchy and organism. """return"gs_"+"_._".join(hierarchy+ \
(organismiforganism!=Noneelse"",))+".pck"deffilename_parse(fn):""" Returns a hierarchy and the organism from the filename."""fn=fn[3:-4]parts=fn.split("_._")hierarchy=tuple(parts[:-1])org=parts[-1]ifparts[-1]!=""elseNonereturnhierarchy,orgdefis_genesets_file(fn):returnfn.startswith("gs_")andfn.endswith(".pck")deflist_local():""" Returns available gene sets from the local repository: a list of (hierarchy, organism, on_local) """pth=local_path()gs_files=filter(is_genesets_file,os.listdir(pth))return[filename_parse(fn)+(True,)forfnings_files]deflist_serverfiles_from_flist(flist):gs_files=filter(is_genesets_file,flist)localfiles=set(orngServerFiles.listfiles(sfdomain))return[filename_parse(fn)+ \
((True,)iffninlocalfileselse(False,))forfnings_files]deflist_serverfiles_conn(serverfiles=None):""" Returns available gene sets from the server files repository: a list of (hierarchy, organism, on_local) """ifserverfiles==None:serverfiles=orngServerFiles.ServerFiles()flist=serverfiles.listfiles(sfdomain)returnlist_serverfiles_from_flist(flist)deflist_serverfiles():fname=orngServerFiles.localpath_download(sfdomain,"index.pck")flist=pickle.load(open(fname,'r'))returnlist_serverfiles_from_flist(flist)deflist_all():""" return a list of (hier, org, avalable_locally) If something for a specific (hier, org) is not downloaded yet, show it as not-local. """flist=list_local()+list_serverfiles()d={}forh,o,localinflist:d[h,o]=min(local,d.get((h,o),True))return[(h,o,local)for(h,o),localind.items()]defupdate_server_list(serverfiles_upload,serverfiles_list=None):ifserverfiles_list==None:serverfiles_list=orngServerFiles.ServerFiles()flist=map(lambdax:filename(*x[:2]),list_serverfiles_conn(serverfiles_list))tfname=pickle_temp(flist)try:fn="index.pck"title="Gene sets: index"tags=["gene sets","index","essential"]serverfiles_upload.upload(sfdomain,fn,tfname,title,tags)serverfiles_upload.unprotect(sfdomain,fn)exceptException,e:raiseefinally:os.remove(tfname)def_register_local(genesets):""" Registers using the common hierarchy and organism. """pth=local_path()org=genesets.common_org()hierarchy=genesets.common_hierarchy()fn=filename(hierarchy,org)withopen(os.path.join(pth,fn),"w")asf:pickle.dump(genesets,f)returnfndefpickle_temp(obj):""" Pickle a file to a temporary file and returns its name """fd,tfname=tempfile.mkstemp()os.close(fd)f=open(tfname,'wb')pickle.dump(obj,f)f.close()returntfnamedef_register_serverfiles(genesets,serverFiles):""" Registers using the common hierarchy and organism. """org=genesets.common_org()hierarchy=genesets.common_hierarchy()fn=filename(hierarchy,org)#save to temporary filetfname=pickle_temp(genesets)try:taxname=obiTaxonomy.name(org)title="Gene sets: "+", ".join(hierarchy)+ \
((" ("+taxname+")")iforg!=Noneelse"")tags=list(hierarchy)+["gene sets",taxname]+ \
(["essential"]iforginobiTaxonomy.essential_taxids()else[])serverFiles.upload(sfdomain,fn,tfname,title,tags)serverFiles.unprotect(sfdomain,fn)finally:os.remove(tfname)update_server_list(serverFiles)defregister(genesets,serverFiles=None):""" Hierarchy is induced from the gene set names. """ifserverFiles==None:_register_local(genesets)else:_register_serverfiles(genesets,serverFiles)defbuild_hierarchy_dict(files):hierd=defaultdict(list)forind,finenumerate(files):hier,org=fforiinrange(len(hier)+1):hierd[(hier[:i],org)].append(ind)returnhierddefload_local(hierarchy,organism):files=map(lambdax:x[:2],list_local())hierd=build_hierarchy_dict(files)out=GeneSets()for(h,o)in[files[i]foriinhierd[(hierarchy,organism)]]:fname=os.path.join(local_path(),filename(h,o))out.update(pickle.load(open(fname,'r')))returnoutdefload_serverfiles(hierarchy,organism):files=map(lambdax:x[:2],list_serverfiles())hierd=build_hierarchy_dict(files)out=GeneSets()for(h,o)in[files[i]foriinhierd[(hierarchy,organism)]]:fname=orngServerFiles.localpath_download(sfdomain,filename(h,o))out.update(pickle.load(open(fname,'r')))returnoutdefload(hierarchy,organism):""" First try to load from the local registred folder. If the file is not available, load it from the server files. """ret=load_local(hierarchy,organism)iflen(ret)==0:ret.update(load_serverfiles(hierarchy,organism))returnretdefcollections(*args):""" Input is a list of collections. Collection can either be a tuple (hierarchy, orgranism), where hierarchy is a tuple also. """result=GeneSets()forcollectioninargs:try:result.update(collection)except(ValueError,TypeError):ifissequencens(collection):#have a hierarchy, organism specificationnew=load(*collection)result.update(new)else:ifcollection.lower()[-4:]==".gmt":#format from webpageresult.update(loadGMT(open(collection,"rt").read(),collection))else:raiseException("collection() accepts files in .gmt format only.")returnresultdefissequencens(x):"Is x a sequence and not string ? We say it is if it has a __getitem__ method and it is not an instance of basestring."returnhasattr(x,'__getitem__')andnotisinstance(x,basestring)classTException(Exception):passdefupload_genesets(rsf):""" Builds the default gene sets and """orngServerFiles.update_local_files()genesetsfn=[keggGeneSets,goGeneSets,miRNAGeneSets]organisms=obiTaxonomy.common_taxids()forfningenesetsfn:fororginorganisms:try:print"Uploading ORG",org,fngenesets=fn(org).split_by_hierarchy()forgsingenesets:print"registering",gs.common_hierarchy()register(gs,rsf)#server files#register(gs)print"successful",gs.common_hierarchy()except(obiKEGG.OrganismNotFoundError,GenesetRegException):print"organism not found",orgif__name__=="__main__":printcytobandGeneSets()exit()rsf=orngServerFiles.ServerFiles(username=sys.argv[1],password=sys.argv[2])upload_genesets(rsf)pass