"""Author: Christian BouwenseProgram that gets the revision data for a user and measures burstiness."""importtimeimportrandomimportdatetimeasdtimportmwapiimportoperatorimportnumpyasnpimportdateutil.parserasdupfrommatplotlibimportpyplotaspltimportmatplotlib.patchesasmpatches

defget_user_revisions(user,uc_prop='timestamp',uc_start='today',uc_end='2000-01-01T00:00:00Z'):# Information specifying user we are interested inuc_user=user# We're always going to want these parameters to be the sameaction='query'uc_list='usercontribs'uc_limit='max'today=dt.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%dT%H:%M:%SZ')# User can just give the string "today" instead of the timestampifuc_start=="today":uc_start=todayelse:uc_start=start_date# Temporary dictionary holding amount of revisions for each userrevisions_by_user={}# Connect to Wikipediasession=mwapi.Session('https://en.wikipedia.org',user_agent='cbouwense')# Query Wikipedia for revisions on the supplied article# The result is stored into the dictionary "rev_dict"rev_dict=session.get(action=action,list=uc_list,ucuser=uc_user,uclimit=uc_limit,ucstart=uc_start,ucend=uc_end)# Go through the timestamps for each revision made.# If the timestamp is already a key in our dictionary, increment that key value by 1.# Else, create a new key for that year in our dictionary and set it to 1rev_timestamps=[]forrevinrev_dict['query']['usercontribs']:timestamp=dup.parse(rev['timestamp'])rev_timestamps.append(timestamp)# Check if there is a section named "continue".# If there is, that means the query did not get all the data# because of the per-user query limits.print("Retrieving data on %s from Wikipedia..."%uc_user)while'continue'inrev_dict:continue_val=rev_dict['continue']['uccontinue']rev_dict=session.get(action=action,list=uc_list,ucuser=uc_user,uclimit=uc_limit,ucstart=uc_start,ucend=uc_end,uccontinue=continue_val)forrevinrev_dict['query']['usercontribs']:timestamp=dup.parse(rev['timestamp'])rev_timestamps.append(timestamp)# Enumerate the times between events into a listinterevent_times=[]foriinrange(0,len(rev_timestamps)-1):interevent_times.append((rev_timestamps[i]-rev_timestamps[i+1]).total_seconds())# Create entry in user_data for the current useruser_data[uc_user]={}# Add data to global dictionariesuser_data[uc_user]['interevent_times']=interevent_timesget_B(uc_user)get_M(uc_user)print("Data received successfully!")

defget_M(user):# Store times in this variable with a much shorter nametimes=user_data[user]['interevent_times']mean_1=np.mean(times[0:len(times)-1])mean_2=np.mean(times[1:len(times)])std_dev_1=np.std(times[0:len(times)-1])std_dev_2=np.std(times[1:len(times)])summation=0foriinrange(0,len(times)-1):tau_i=times[i]tau_i_plus_one=times[i+1]summation_term=(((tau_i-mean_1)*(tau_i_plus_one-mean_2))/(std_dev_1*std_dev_2))summation+=summation_termM=(1/(len(times)-1))*summationuser_data[user]['M']=M