%matplotlib inline
from IPython.html.widgets import interact
from scipy import stats
import seaborn as sns
import pandas as pd
n0=stats.norm(0,1)
n1=stats.norm(1,2)
xi = linspace(-5,5,100)
fig,ax=subplots()
ax.plot(xi,n0.pdf(xi))
ax.plot(xi,n1.pdf(xi))
def bias_coin(phead = .5):
while True:
yield int( np.random.rand() < phead )
pct_mixed = 0.1
bias_coin_gen = bias_coin(pct_mixed)
dual_set = [n0,n1]
samples = [ dual_set[bias_coin_gen.next()].rvs() for i in range(500) ]
hist(samples,bins=20)
title('average = %3.3f, median=%3.3f pct_mixed=%3.3f'%(mean(samples),np.median(samples),pct_mixed))
import sympy.stats
from sympy.abc import x
eps = sympy.symbols('epsilon')
mixed_cdf = sympy.stats.cdf(sympy.stats.Normal('x',0,1),'x')(x)*(1-eps) + eps*sympy.stats.cdf(sympy.stats.Normal('x',1,2),'x')(x)
mixed_pdf = sympy.diff(mixed_cdf,x)
def plot_mixed_dist(epsilon=.1):
n1 = stats.norm(1,2)
xi = linspace(-5,5,100)
fig,ax = subplots()
ax.plot(xi,[sympy.lambdify(x,mixed_pdf.subs(eps,epsilon))(i) for i in xi],label='mixed',lw=2)
ax.plot(xi,n0.pdf(xi),label='g(x)',linestyle='--')
ax.plot(xi,n1.pdf(xi),label='h(x)',linestyle='--')
ax.legend(loc=0)
ax.set_title('epsilon = %2.2f'%(epsilon))
ax.vlines(0,0,.4,linestyle='-',color='g')
ax.vlines(epsilon,0,.4,linestyle='-',color='b')
interact(plot_mixed_dist,epsilon=(0,1,.05))
fig,ax=subplots()
colors=['b','r']
for k in [1,2]:
ax.plot(xi,np.ma.masked_array(xi,abs(xi)>k),color=colors[k-1])
ax.plot(xi,np.ma.masked_array(np.sign(xi)*k,abs(xi) k > 0]^2')
numer=mma.eval('Integrate[lpdf*Piecewise[{{x, Abs[x] < k},{k*Sign[x],Abs[x] >= k}}]^2, {x, -Infinity, Infinity}, Assumptions -> k > 0]')
def asymp_variance(kval,eps=0.01):
mma.push('k',kval)
mma.push('Epsilon',eps)
mma.eval('numer ='+numer) # used closure string
mma.eval('denom ='+denom)
mma.eval('Clear[k,Epsilon]')
return float(mma.eval('N[numer/denom]'))
return asymp_variance
asympt_var_case1 = closure_variance((0,1),(1,2)) # case 1 with N(0,1) + N(1,2)
fig,ax=subplots()
ax.plot(kvals,[asympt_var_case1(k,0) for k in kvals],'-o',label='eps=0')
ax.plot(kvals,[asympt_var_case1(k,.05) for k in kvals],'-o',label='eps=.05')
ax.plot(kvals,[asympt_var_case1(k,.1) for k in kvals],'-o',label='eps=0.1')
ax.set_xlabel("k")
ax.set_ylabel("relative asymptotic efficiency ")
ax.legend(loc=0)
ax.set_title(r"$\mathcal{N}(0,1) , \mathcal{N}(1,2)$ mixed",fontsize=18)
ax.grid()
asympt_var_case2 = closure_variance((0,0),(1,10)) # case 1 with N(0,1) + N(0,10)
fig2,ax=subplots()
ax.plot(kvals,[asympt_var_case2(k,0) for k in kvals],'-o',label='eps=0')
ax.plot(kvals,[asympt_var_case2(k,.05) for k in kvals],'-o',label='eps=.05')
ax.plot(kvals,[asympt_var_case2(k,.1) for k in kvals],'-o',label='eps=0.1')
ax.set_xlabel("k")
ax.set_ylabel("relative asymptotic efficiency ")
ax.legend(loc=0)
ax.set_title(r"$\mathcal{N}(0,1) , \mathcal{N}(0,10)$ mixed",fontsize=18)
ax.grid()
fig.get_axes()[0].axis(ymax=3.5)
fig
nsamples = 200
xs = np.array([dual_set[bias_coin_gen.next()].rvs() for i in range(200)*nsamples ]).reshape(nsamples,-1)
fig,ax=subplots()
ax.hist(np.mean(xs,0),20,alpha=0.8,label = 'mean')
ax.hist(np.median(xs,0),20,alpha=0.3,label ='median')
ax.legend()
fig,ax=subplots()
sns.violinplot(np.vstack([np.median(xs,axis=0),np.mean(xs,axis=0)]).T,ax=ax,names=['median','mean']);
mma.push('data',xs[:,0].tolist())
mma.eval('psi[k_] := Function[x, Piecewise[{{x, Abs[x] < k}, {k*Sign[x], Abs[x] > k}}]]')
def psi_est(k,x):
if x.ndim ==2 : # loop over columns
out = []
for i in range(x.shape[1]):
data = x[:,i]
out.append( psi_est(k,data) ) # recurse
return np.array(out)
else:
mma.push('data',x.tolist())
return float(mma.eval('\[Mu] /. FindRoot[Plus @@ (psi[%d] /@ (data - \[Mu])) == 0, {\[Mu], 0}]'%(k)))
hist(psi_est(1,xs),alpha=.7,label='k=1')
hist(psi_est(2,xs),alpha=.7,label='k=2')
hist(psi_est(3,xs),alpha=.7,label='k=3')
legend(loc=0)
huber_est={k:psi_est(k,xs) for k in [1,1.5,2,3]}
huber_est[0] = np.median(xs,axis=0)
huber_est[4] = np.mean(xs,axis=0)
fig,ax=subplots()
sns.violinplot(pd.DataFrame(huber_est),ax=ax)
ax.set_xticklabels(['median','1','1.5','2','3','mean']);
from sympy import mpmath, symbols, diff, Piecewise, sign, lambdify
from sympy.stats import density, cdf, Normal
from sympy.abc import k,x
eps = symbols('epsilon')
lpdf=diff(cdf(Normal('x',0,1))(x)*(1-eps)+ eps*cdf(Normal('x',0,10))(x),x)
p = Piecewise((x,abs(x)