[docs]defin_idle():""" Return True if this function is run within idle. Tkinter programs that are run in idle should never call ``Tk.mainloop``; so this function should be used to gate all calls to ``Tk.mainloop``. :warning: This function works by checking ``sys.stdin``. If the user has modified ``sys.stdin``, then it may return incorrect results. :rtype: bool """importsysreturnsys.stdin.__class__.__name__in('PyShell','RPCProxy')

########################################################################### READ FROM FILE OR STRING########################################################################### recipe from David Mertz

[docs]deffilestring(f):ifhasattr(f,'read'):returnf.read()elifisinstance(f,string_types):withopen(f,'r')asinfile:returninfile.read()else:raiseValueError("Must be called with a filename or file-like object")

[docs]defbreadth_first(tree,children=iter,maxdepth=-1):"""Traverse the nodes of a tree in breadth-first order. (No need to check for cycles.) The first argument should be the tree root; children should be a function taking as argument a tree node and returning an iterator of the node's children. """queue=deque([(tree,0)])whilequeue:node,depth=queue.popleft()yieldnodeifdepth!=maxdepth:try:queue.extend((c,depth+1)forcinchildren(node))exceptTypeError:pass

[docs]defguess_encoding(data):""" Given a byte string, attempt to decode it. Tries the standard 'UTF8' and 'latin-1' encodings, Plus several gathered from locale information. The calling program *must* first call:: locale.setlocale(locale.LC_ALL, '') If successful it returns ``(decoded_unicode, successful_encoding)``. If unsuccessful it raises a ``UnicodeError``. """successful_encoding=None# we make 'utf-8' the first encodingencodings=['utf-8']## next we add anything we can learn from the localetry:encodings.append(locale.nl_langinfo(locale.CODESET))exceptAttributeError:passtry:encodings.append(locale.getlocale()[1])except(AttributeError,IndexError):passtry:encodings.append(locale.getdefaultlocale()[1])except(AttributeError,IndexError):pass## we try 'latin-1' lastencodings.append('latin-1')forencinencodings:# some of the locale calls# may have returned Noneifnotenc:continuetry:decoded=text_type(data,enc)successful_encoding=encexcept(UnicodeError,LookupError):passelse:breakifnotsuccessful_encoding:raiseUnicodeError('Unable to decode input data. ''Tried the following encodings: %s.'%', '.join([repr(enc)forencinencodingsifenc]))else:return(decoded,successful_encoding)

########################################################################### Utilities for directed graphs: transitive closure, and inversion# The graph is represented as a dictionary of sets##########################################################################

[docs]deftransitive_closure(graph,reflexive=False):""" Calculate the transitive closure of a directed graph, optionally the reflexive transitive closure. The algorithm is a slight modification of the "Marking Algorithm" of Ioannidis & Ramakrishnan (1998) "Efficient Transitive Closure Algorithms". :param graph: the initial graph, represented as a dictionary of sets :type graph: dict(set) :param reflexive: if set, also make the closure reflexive :type reflexive: bool :rtype: dict(set) """ifreflexive:base_set=lambdak:set([k])else:base_set=lambdak:set()# The graph U_i in the article:agenda_graph=dict((k,graph[k].copy())forkingraph)# The graph M_i in the article:closure_graph=dict((k,base_set(k))forkingraph)foriingraph:agenda=agenda_graph[i]closure=closure_graph[i]whileagenda:j=agenda.pop()closure.add(j)closure|=closure_graph.setdefault(j,base_set(j))agenda|=agenda_graph.get(j,base_set(j))agenda-=closurereturnclosure_graph

[docs]defbigrams(sequence,**kwargs):""" Return the bigrams generated from a sequence of items, as an iterator. For example: >>> from nltk.util import bigrams >>> list(bigrams([1,2,3,4,5])) [(1, 2), (2, 3), (3, 4), (4, 5)] Use bigrams for a list version of this function. :param sequence: the source data to be converted into bigrams :type sequence: sequence or iter :rtype: iter(tuple) """foriteminngrams(sequence,2,**kwargs):yielditem

[docs]deftrigrams(sequence,**kwargs):""" Return the trigrams generated from a sequence of items, as an iterator. For example: >>> from nltk.util import trigrams >>> list(trigrams([1,2,3,4,5])) [(1, 2, 3), (2, 3, 4), (3, 4, 5)] Use trigrams for a list version of this function. :param sequence: the source data to be converted into trigrams :type sequence: sequence or iter :rtype: iter(tuple) """foriteminngrams(sequence,3,**kwargs):yielditem

####################################################################### Binary Search in a File####################################################################### inherited from pywordnet, by Oliver Steele

[docs]defbinary_search_file(file,key,cache={},cacheDepth=-1):""" Return the line from the file with first word key. Searches through a sorted file using the binary search algorithm. :type file: file :param file: the file to be searched through. :type key: str :param key: the identifier we are searching for. """key=key+' 'keylen=len(key)start=0currentDepth=0ifhasattr(file,'name'):end=os.stat(file.name).st_size-1else:file.seek(0,2)end=file.tell()-1file.seek(0)whilestart<end:lastState=start,endmiddle=(start+end)//2ifcache.get(middle):offset,line=cache[middle]else:line=""whileTrue:file.seek(max(0,middle-1))ifmiddle>0:file.discard_line()offset=file.tell()line=file.readline()ifline!="":break# at EOF; try to find start of the last linemiddle=(start+middle)//2ifmiddle==end-1:returnNoneifcurrentDepth<cacheDepth:cache[middle]=(offset,line)ifoffset>end:assertend!=middle-1,"infinite loop"end=middle-1elifline[:keylen]==key:returnlineelifline>key:assertend!=middle-1,"infinite loop"end=middle-1elifline<key:start=offset+len(line)-1currentDepth+=1thisState=start,endiflastState==thisState:# Detects the condition where we're searching past the end# of the file, which is otherwise difficult to detectreturnNonereturnNone

[docs]defset_proxy(proxy,user=None,password=''):""" Set the HTTP proxy for Python to download through. If ``proxy`` is None then tries to set proxy from environment or system settings. :param proxy: The HTTP proxy server to use. For example: 'http://proxy.example.com:3128/' :param user: The username to authenticate with. Use None to disable authentication. :param password: The password to authenticate with. """fromnltkimportcompatifproxyisNone:# Try and find the system proxy settingstry:proxy=getproxies()['http']exceptKeyError:raiseValueError('Could not detect default proxy settings')# Set up the proxy handlerproxy_handler=ProxyHandler({'https':proxy,'http':proxy})opener=build_opener(proxy_handler)ifuserisnotNone:# Set up basic proxy authentication if providedpassword_manager=HTTPPasswordMgrWithDefaultRealm()password_manager.add_password(realm=None,uri=proxy,user=user,passwd=password)opener.add_handler(ProxyBasicAuthHandler(password_manager))opener.add_handler(ProxyDigestAuthHandler(password_manager))# Overide the existing url openerinstall_opener(opener)

[docs]defelementtree_indent(elem,level=0):""" Recursive function to indent an ElementTree._ElementInterface used for pretty printing. Run indent on elem and then output in the normal way. :param elem: element to be indented. will be modified. :type elem: ElementTree._ElementInterface :param level: level of indentation for this element :type level: nonnegative integer :rtype: ElementTree._ElementInterface :return: Contents of elem indented to reflect its structure """i="\n"+level*" "iflen(elem):ifnotelem.textornotelem.text.strip():elem.text=i+" "foreleminelem:elementtree_indent(elem,level+1)ifnotelem.tailornotelem.tail.strip():elem.tail=ielse:ifleveland(notelem.tailornotelem.tail.strip()):elem.tail=i

[docs]defchoose(n,k):""" This function is a fast way to calculate binomial coefficients, commonly known as nCk, i.e. the number of combinations of n things taken k at a time. (https://en.wikipedia.org/wiki/Binomial_coefficient). This is the *scipy.special.comb()* with long integer computation but this approximation is faster, see https://github.com/nltk/nltk/issues/1181 >>> choose(4, 2) 6 >>> choose(6, 2) 15 :param n: The number of things. :type n: int :param r: The number of times a thing is taken. :type r: int """if0<=k<=n:ntok,ktok=1,1fortinrange(1,min(k,n-k)+1):ntok*=nktok*=tn-=1returnntok//ktokelse:return0