from collections import defaultdict
import powerlaw
import pandas as pd
from scipy import stats
import numpy as np
import json
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
path = "/Users/mark/Dropbox/Math/Complex_Systems/research/wikipedia-network/paper/writeup/graphics/"
results_path = "/Users/mark/Desktop/wiki_v4/"
#load funnels data
with open(results_path + "feed_count.json") as f:
feeder_dict = json.load(f)
df = pd.DataFrame(feeder_dict.items())
df.columns = ['article', 'traversal funnels']
df = df.sort_values(by='traversal funnels', ascending=False)
df['rank'] = np.arange(1, df.shape[0]+1)
powerlaw_fit_funnels = powerlaw.Fit(df['traversal funnels'], discrete=True)
print "alpha: ", powerlaw_fit_funnels.alpha
print "xmin: ", powerlaw_fit_funnels.xmin
print "gamma: ", 1 / powerlaw_fit_funnels.alpha + 1
print "Kolmogorov Distance: ", powerlaw_fit_funnels.D
1/1.73 + 1
plt.figure(figsize=(8,6))
plt.scatter(df["rank"], df["traversal funnels"], color="#268bd2")
plt.tick_params(axis='both', which='major', labelsize=14)
#change axis labeling to 10^#
axis = plt.gca()
axis.set_yscale('symlog')
axis.set_xscale('symlog')
plt.title("Traversal Funnels Distribution", fontsize=14)
plt.xlabel("Article Rank", fontsize=14)
plt.ylabel("Traversal Funnels", fontsize=14)
#extend axes
plt.gca().set_ylim(bottom=-0.5, top=10**(8))
plt.gca().set_xlim(left=0)
figCCDF = powerlaw_fit_funnels.plot_pdf(color='#268bd2', linewidth=2)
powerlaw_fit_funnels.power_law.plot_pdf(color='g', linestyle='--', ax=figCCDF,
label="power law fit\n"+ r'$\alpha$ = 1.7,' + r'$\gamma$ = 1.6')
plt.tick_params(axis='both', which='major', labelsize=14)
#extend axes
plt.gca().set_xlim(left=1, right=10**8)
plt.gca().set_ylim(top=1, bottom=10**(-13))
plt.legend(numpoints=2)
plt.ylabel("PDF", fontsize=14)
plt.xlabel("Traversal Funnels", fontsize=14)
#defaults
sns.set()
plt.figure(figsize=(8,6))
plt.scatter(df["rank"], df["traversal funnels"], color="#268bd2")
plt.tick_params(axis='both', which='major', labelsize=14)
#change axis labeling to 10^#
axis = plt.gca()
axis.set_yscale('symlog')
axis.set_xscale('symlog')
plt.title("Traversal Funnels Distribution", fontsize=14)
plt.xlabel("Article Rank", fontsize=14)
plt.ylabel("Traversal Funnels", fontsize=14)
#extend axes
plt.gca().set_ylim(bottom=-0.5, top=10**(8))
plt.gca().set_xlim(left=0)
sns.set_style("dark")
#subplot in top corner
a = plt.axes([.50, .53, .38, .34], axisbg='y')
figCCDF = powerlaw_fit_funnels.plot_pdf(color='#268bd2', linewidth=2)
powerlaw_fit_funnels.power_law.plot_pdf(color='g', linestyle='--', ax=figCCDF,
label="power law fit\n"+ r'$\alpha \simeq$ 1.73,' + r'$\gamma \simeq$ 1.58')
plt.tick_params(axis='both', which='major', labelsize=14)
a.set_xlim(right=10**7)
plt.xlabel("Traversal Funnels", fontsize=14)
plt.ylabel("PDF", fontsize=14)
plt.legend(numpoints=2, fontsize=14)
#transparent
a.patch.set_alpha(0.1)
#reset defaults
sns.set()
#save figure
plt.savefig(path+'funnels_distribution.png', format='png', dpi=300, bbox_inches='tight')
R, p = powerlaw_fit_funnels.distribution_compare('power_law', 'lognormal')
print "R: ", R
print "p: ", p
R, p = powerlaw_fit_funnels.distribution_compare('power_law', 'exponential')
print "R: ", R
print "p: ", p
R, p = powerlaw_fit_funnels.distribution_compare('power_law', 'stretched_exponential')
print "R: ", R
print "p: ", p
R, p = powerlaw_fit_funnels.distribution_compare('power_law', 'truncated_power_law')
print "R: ", R
print "p: ", p