import scipy.stats as stats

figsize( 12.5, 9 )

norm_pdf = stats.norm.pdf

subplot(311)
x = np.linspace(0, 60000, 200)
sp1 = plt.fill_between(x , 0, norm_pdf( x, 35000, 7500), 
                color = "#348ABD", lw = 3, alpha = 0.6,
                label = "historical total prices")
p1 = Rectangle((0, 0), 1, 1, fc=sp1.get_facecolor()[0])
legend([p1], [sp1.get_label()])

subplot(312)
x = np.linspace(0, 10000, 200)
sp2 = plt.fill_between(x , 0, norm_pdf( x, 3000, 500), 
                color = "#A60628", lw = 3, alpha = 0.6,
                 label="snowblower price guess")

p2 = Rectangle((0, 0), 1, 1, fc=sp2.get_facecolor()[0])
legend([p2], [sp2.get_label()])

subplot(313)
x = np.linspace(0, 25000, 200)
sp3 = plt.fill_between(x , 0, norm_pdf( x, 12000, 3000), 
                color = "#7A68A6", lw = 3, alpha = 0.6,
                 label = "Trip price guess")
plt.autoscale( tight=True)
p3 = Rectangle((0, 0), 1, 1, fc=sp3.get_facecolor()[0])
legend([p3], [sp3.get_label()])

%pylab inline

import pymc as mc

data_mu = [ 3e3, 12e3]

data_std =  [ 5e2, 3e3 ] 

mu_prior = 35e3
std_prior =  75e2

true_price = mc.Normal( "true_price", mu_prior, 1.0/std_prior**2 )


prize_1 = mc.Normal( "first_prize", data_mu[0], 1.0/data_std[0]**2 )
prize_2 = mc.Normal( "second_prize", data_mu[1], 1.0/data_std[1]**2 )
price_estimate = prize_1 + prize_2

@mc.potential
def error( true_price = true_price, price_estimate = price_estimate ):
        return mc.normal_like( true_price,  price_estimate, 1/(3e3)**2)


model = mc.Model([true_price, prize_1, prize_2, price_estimate, error ])
mcmc = mc.MCMC(model)
mcmc.sample( 50000, 10000) 

price_trace = mcmc.trace( "true_price"  )[:]

figsize( 12.5, 4)

import scipy.stats as stats

x = np.linspace( 5000, 40000 )
plt.plot( x, stats.norm.pdf(x, 35000, 7500), c = "k", lw = 2, 
         label = "prior dist. of suite price")

_hist = plt.hist( price_trace, bins = 35, normed= True, histtype= "stepfilled")
plt.title( "Posterior of the true price estimate" )
plt.vlines( mu_prior, 0, 1.1*np.max(_hist[0] ), label = "prior's mean",
             linestyles="--" )
plt.vlines( price_trace.mean(), 0, 1.1*np.max(_hist[0] ), \
    label = "posterior's mean", linestyles="-.")
plt.legend(loc = "upper left")

figsize( 12.5, 7 )
#numpy friendly showdown_loss
def showdown_loss( guess, true_price, risk = 80000):
        loss = np.zeros_like( true_price )
        ix = true_price < guess
        loss[~ix] = np.abs( guess - true_price[~ix] )
        close_mask = [ abs( true_price - guess) <= 250 ]
        loss[close_mask] = -2*true_price[close_mask]
        loss[ix] = risk
        return loss


guesses = np.linspace( 5000, 50000, 70) 
risks = np.linspace( 30000, 150000, 6 )
expected_loss = lambda guess, risk: \
            showdown_loss( guess, price_trace, risk ).mean()
        
for _p in risks:
    results = [ expected_loss( _g, _p ) for _g in guesses ]
    plt.plot( guesses, results, label = "%d"%_p )
    
plt.title("Expected loss of different guesses, \nvarious risk-levels of \
overestimating")
plt.legend( loc="upper left", title="Risk parameter")
plt.xlabel("price bid")
plt.ylabel("expected loss")
plt.xlim( 5000, 30000 );


import scipy.optimize as sop

ax = subplot(111)


for _p in risks:
    _color = ax._get_lines.color_cycle.next()
    _min_results = sop.fmin( expected_loss, 15000, args=(_p,),disp = False)
    _results = [ expected_loss( _g, _p ) for _g in guesses ]
    plt.plot( guesses, _results , color = _color )
    plt.scatter( _min_results, 0, s = 60, \
        color= _color, label = "%d"%_p)
    plt.vlines( _min_results, 0, 120000, color = _color, linestyles="--" )
    print "minimum at risk %d: %.2f"%( _p, _min_results )
                                    
plt.title("Expected loss & Bayes actions of different guesses, \n \
various risk-levels of overestimating")
plt.legend( loc="upper left", scatterpoints = 1, title = "Bayes action at risk:")
plt.xlabel("price guess")
plt.ylabel("expected loss")
plt.xlim( 7000, 30000 )
plt.ylim( -1000, 80000)

figsize(12.5, 4)
def stock_loss( true_return, yhat, alpha = 100. ):
    if true_return*yhat < 0:
        #opposite signs, not good
        return alpha*yhat**2 - sign( true_return )*yhat \
                        + abs( true_return ) 
    else:
        return abs( true_return - yhat )
    
    
true_value = .05
pred = np.linspace( -.04, .12, 75 )

plt.plot( pred, [stock_loss( true_value, _p) for _p in pred], \
   label = "Loss associated with\n prediction if true value = 0.05", lw =3 ) 
plt.vlines( 0, 0, .25, linestyles="--" )

plt.xlabel( "prediction")
plt.ylabel( "loss" )
plt.xlim( -0.04, .12 )
plt.ylim( 0, 0.25)

true_value = -.02
plt.plot( pred, [stock_loss( true_value, _p) for _p in pred], alpha = 0.6, \
   label = "Loss associated with\n prediction if true value = -0.02", lw =3) 
plt.legend()
plt.title("Stock returns loss if true value = 0.05, -0.02" )

## Code to create artificial data
N = 100
X = 0.025*np.random.randn(N )
Y = 0.5*X + 0.01*np.random.randn( N) 

ls_coef_ = np.cov( X, Y )[0,1]/np.var(X)
ls_intercept = Y.mean() - ls_coef_*X.mean()

plt.scatter( X, Y, c="k")
plt.xlabel("trading signal")
plt.ylabel("returns")
plt.title( "Empirical returns vs trading signal" )
plt.plot( X, ls_coef_*X + ls_intercept, label = "Least-squares line")
plt.xlim( X.min(), X.max())
plt.ylim( Y.min(), Y.max() )
plt.legend( loc="upper left" )

import pymc as mc
from pymc.Matplot import plot as mcplot

std = mc.Uniform( "std", 0, 100, trace = False ) #this needs to be explained.

@mc.deterministic
def prec( U = std ):
    return 1.0/( U )**2

beta = mc.Normal( "beta", 0, 0.0001 )
alpha = mc.Normal( "alpha", 0, 0.0001 )

@mc.deterministic
def mean( X = X, alpha = alpha, beta = beta ):
    return alpha + beta*X
    
obs = mc.Normal( "obs", mean, prec, value = Y, observed = True)
model = mc.Model( {"obs":obs, "beta_0":beta, "alpha_0":alpha, "prec":prec} )
mcmc = mc.MCMC( model )

mcmc.sample( 100000, 80000)
mcplot( mcmc )


figsize( 12.5, 6 )
from scipy.optimize import fmin


def stock_loss( price, pred, coef = 500):
    """vectorized for numpy"""
    sol = np.zeros_like(price)
    ix = price*pred < 0 
    sol[ix] = coef*pred**2 - sign(price[ix])*pred + abs(price[ix])
    sol[ ~ix ] = abs( price[~ix] - pred )
    return sol


tau_samples = mcmc.trace( "prec" )[:]
alpha_samples = mcmc.trace( "alpha" )[:]
beta_samples = mcmc.trace( "beta" )[:]

N = tau_samples.shape[0]

noise = 1./np.sqrt(tau_samples)*np.random.randn(N) 

possible_outcomes = lambda signal: alpha_samples + beta_samples*signal \
        + noise

    
opt_predictions = np.zeros(50)
trading_signals =  np.linspace( X.min(), X.max(), 50 )
for i, _signal in enumerate( trading_signals ):
       _possible_outcomes = possible_outcomes( _signal )
       tomin = lambda pred: stock_loss( _possible_outcomes, pred).mean()
       opt_predictions[i] = fmin( tomin, 0, disp = False )
        
        
plt.xlabel("trading signal")
plt.ylabel("prediction")
plt.title( "Least-squares prediction vs. Bayes action prediction" )
plt.plot( X, ls_coef_*X + ls_intercept, label ="Least-squares prediction")
plt.xlim( X.min(), X.max())
plot( trading_signals, opt_predictions, label ="Bayes action prediction")
plt.legend( loc="upper left" )

from draw_sky2 import draw_sky

n_sky = 3 #choose a file/sky to examine.
data = np.genfromtxt( "data/Train_Skies/Train_Skies/\
Training_Sky%d.csv"%(n_sky),
                      dtype = None,
                      skip_header = 1,
                      delimiter = ",",
                      usecols = [1,2,3,4])
print "Data on galaxies in sky %d."%n_sky
print "position_x, position_y, e_1, e_2 "
print data[:3]

fig = draw_sky( data )
plt.title("Galaxy positions and ellipcities of sky %d."%n_sky)
plt.xlabel( "x-position")
plt.ylabel( "y-position" );


def euclidean_distance( x, y):
    return np.sqrt( ( ( x - y )**2).sum(axis=1) ) 

def f_distance( gxy_pos, halo_pos, c):
    # foo_position should be a 2-d numpy array
    return np.maximum(euclidean_distance( gxy_pos, halo_pos), c)[:,None]

def tangential_distance(glxy_position, halo_position):
    # foo_position should be a 2-d numpy array
    delta = glxy_position - halo_position
    t = (2*np.arctan( delta[:,1]/delta[:,0] ))[:,None]
    return np.concatenate( [-np.cos( t), -np.sin(t) ], axis=1)

import pymc as mc

#set the size of the halo's mass
mass_large = mc.Uniform( "mass_large", 40, 180, trace = False)

#set the initial prior position of the halos, it's a 2-d Uniform dist.
halo_position = mc.Uniform( "halo_position", 0, 4200, size = (1,2) )


@mc.deterministic
def mean(mass=mass_large, h_pos = halo_position, glx_pos=data[:,:2]):
    return mass/f_distance( glx_pos, h_pos, 240)*\
            tangential_distance( glx_pos, h_pos )
           

ellpty = mc.Normal( "ellipcity", mean, 1./0.05, observed = True,
                   value = data[:,2:] )
model = mc.Model( [ellpty, mean, halo_position, mass_large] )
mcmc = mc.MCMC( model )
map_ = mc.MAP( model )
map_.fit()
mcmc.sample(200000, 140000, 3)


t = mcmc.trace("halo_position")[:].reshape( 20000,2)


fig = draw_sky( data )
plt.title("Galaxy positions and ellipcities of sky %d."%n_sky)
plt.xlabel( "x-position")
plt.ylabel( "y-position" )
scatter( t[:,0], t[:,1], alpha = 0.015, c = "r")
plt.xlim( 0, 4200 )
plt.ylim(0, 4200 );

halo_data = np.genfromtxt( "data/Training_halos.csv", 
                      delimiter = ",",
                      usecols = [1, 2,3, 4,5,6,7,8,9],
                      skip_header = 1)
print halo_data[ n_sky]

fig = draw_sky( data )
plt.title("Galaxy positions and ellipcities of sky %d."%n_sky)
plt.xlabel( "x-position")
plt.ylabel( "y-position" )
plt.scatter( t[:,0], t[:,1], alpha = 0.015, c = "r")
plt.scatter( halo_data[n_sky-1][3], halo_data[n_sky-1][4], 
            label = "True halo position",
            c = "k", s = 70)
plt.legend(scatterpoints = 1, loc = "lower left")
plt.xlim( 0, 4200 )
plt.ylim(0, 4200 );

print "True halo location:", halo_data[n_sky][3], halo_data[n_sky][4]

mean_posterior = t.mean(axis=0).reshape(1,2)
print mean_posterior

from DarkWorldsMetric import main_score

_halo_data = halo_data[n_sky-1]

nhalo_all =  _halo_data[0].reshape(1,1)
x_true_all = _halo_data[3].reshape(1,1)
y_true_all = _halo_data[4].reshape(1,1)
x_ref_all = _halo_data[1].reshape(1,1)
y_ref_all = _halo_data[2].reshape(1,1)
sky_prediction = mean_posterior


print "Using the mean:"
main_score( nhalo_all, x_true_all, y_true_all, \
            x_ref_all, y_ref_all, sky_prediction)

#what's a bad score?
print
random_guess = np.random.randint( 0, 4200, size=(1,2) )
print "Using a random location:", random_guess
main_score( nhalo_all, x_true_all, y_true_all, \
            x_ref_all, y_ref_all, random_guess )
print

from pymc.Matplot import plot as mcplot

def halo_posteriors( n_halos_in_sky, galaxy_data,
                     samples = 5e5, burn_in = 34e4, thin = 4):

    #set the size of the halo's mass
    """
    exp_mass_large = mc.Uniform( "exp_mass_large", 40, 180)
    @mc.deterministic
    def mass_large( exp_mass_large = exp_mass_large ):
        return np.log( exp_mass_large )
    """
    
    mass_large = mc.Uniform( "mass_large", 40, 180 )
    
    mass_small_1 = 20
    mass_small_2 = 20
    
    masses = np.array( [ mass_large,mass_small_1, mass_small_2], dtype=object)
    
    #set the initial prior positions of the halos, it's a 2-d Uniform dist.
    halo_positions = mc.Uniform( "halo_positions", 0, 4200, 
                    size = (n_halos_in_sky,2)) #notice this size
    
    fdist_constants = np.array( [240, 70, 70] )
    
    @mc.deterministic
    def mean(mass=masses, h_pos = halo_positions, glx_pos=data[:,:2], 
            n_halos_in_sky = n_halos_in_sky):
        
        _sum = 0
        for i in range( n_halos_in_sky ):
            _sum += mass[i]/f_distance( glx_pos,h_pos[i, :], fdist_constants[i])*\
                tangential_distance( glx_pos, h_pos[i, :] )
            
        
        return _sum
               

    ellpty = mc.Normal( "ellipcity", mean, 1./0.05, observed = True,
                       value = data[:,2:] )
    
    model = mc.Model( [ellpty, mean, halo_positions, mass_large] )
    map_ = mc.MAP( model)
    map_.fit(method="fmin_powell)
    
    mcmc = mc.MCMC( model)
    mcmc.sample(samples, burn_in, thin)
    return mcmc.trace( "halo_positions" )[:]

n_sky =215
data = np.genfromtxt( "data/Train_Skies/Train_Skies/\
Training_Sky%d.csv"%(n_sky),
                      dtype = None,
                      skip_header = 1,
                      delimiter = ",",
                      usecols = [1,2,3,4])

#there are 3 halos in this file. 
samples = 10.5e5
traces = halo_posteriors( 3, data, samples = samples, 
                                    burn_in = 9.5e5, 
                                    thin= 10  )


fig = draw_sky( data )
plt.title("Galaxy positions and ellipcities of sky %d."%n_sky)
plt.xlabel( "x-position")
plt.ylabel( "y-position" )

colors = ["#467821", "#A60628", "#7A68A6"]

for i in range( traces.shape[1] ):
    plt.scatter( traces[:, i, 0],  traces[:, i, 1], c = colors[i], alpha = 0.02 )
    
    
for i in range(  traces.shape[1] ):
    plt.scatter( halo_data[n_sky-1][3 + 2*i], halo_data[n_sky-1][4 + 2*i], 
            label = "True halo position",
            c = "k", s = 90)
    
#plt.legend(scatterpoints = 1)
plt.xlim( 0, 4200 )
plt.ylim( 0, 4200 );


_halo_data = halo_data[n_sky-1]
print traces.shape

mean_posterior = traces.mean(axis=0).reshape( 1,4 )
print mean_posterior


nhalo_all =  _halo_data[0].reshape(1,1)
x_true_all = _halo_data[3].reshape(1,1)
y_true_all = _halo_data[4].reshape(1,1)
x_ref_all = _halo_data[1].reshape(1,1)
y_ref_all = _halo_data[2].reshape(1,1)
sky_prediction = mean_posterior


print "Using the mean:"
main_score( [1], x_true_all, y_true_all, \
            x_ref_all, y_ref_all, sky_prediction)

#what's a bad score?
print
random_guess = np.random.randint( 0, 4200, size=(1,2) )
print "Using a random location:", random_guess
main_score( [1], x_true_all, y_true_all, \
            x_ref_all, y_ref_all, random_guess )
print

from IPython.core.display import HTML
def css_styling():
    styles = open("../styles/custom.css", "r").read()
    return HTML(styles)
css_styling()