# we can filter the categories by increasing the number of news required per categoryfiltr=100df=tn.groupby('cat').filter(lambdax:(len(x)>filtr)&(x.namenotin'news article storyline bigstory'.split()))df=pd.pivot_table(df,values=['sp','rt'],index=['cat'])ax=sns.regplot('rt','sp',df)#ax.set_ylim(-1,2)ax.set(xlabel='Retweet',ylabel='Sentiment Polarity',title='SP vs RT of Categories',ylim=(-1,2))df.apply(lambdar:ax.annotate(r.name,r.values,xytext=(5,-2),textcoords='offset points'),axis=1);plt.gcf().savefig('figs/cat-rt-sp-scatter.png')