require 'statsample'
Statsample::Analysis.store("Statsample::Bivariate.correlation_matrix") do
# It so happens that Daru::Vector and Daru::DataFrame must update metadata
# like positions of missing values every time they are created.
#
# Since we dont have any missing values in the data that we are creating,
# we set Daru.lazy_update = true so that missing data is not updated every
# time and things happen much faster.
#
# In case you do have missing data and lazy_update has been set to *true*,
# you _SHOULD_ called `#update` on the concerned Vector or DataFrame object
# everytime an assingment or deletion cycle is complete.
Daru.lazy_update = true
# Create a Daru::DataFrame containing 4 vectors a, b, c and d.
#
# Notice that the `clone` option has been set to *false*. This tells Daru
# to not clone the Daru::Vectors being supplied by `rnorm`, since it would
# be unnecessarily counter productive to clone the vectors once they have
# been assigned to the dataframe.
samples = 1000
ds = Daru::DataFrame.new({
:a => rnorm(samples),
:b => rnorm(samples),
:c => rnorm(samples),
:d => rnorm(samples)
}, clone: false)
puts "== DataFrame ==\n"
IRuby.display ds.head
# Calculate correlation matrix by calling the `cor` shorthand.
cm = Statsample::Bivariate.correlation_matrix(ds)
puts "\n== Correlation Matrix ==\n"
IRuby.display cm
# Set lazy_update to *false* once our job is done so that this analysis does
# not accidentally affect code elsewhere.
Daru.lazy_update = false
end
Statsample::Analysis.run_batch
== DataFrame ==
Daru::DataFrame:13085860 rows: 10 cols: 4 | ||||
---|---|---|---|---|
a | b | c | d | |
0 | 0.9011467513657144 | -2.0698264996309637 | -0.05424188567538422 | -0.21497530674467752 |
1 | 0.3477390529339665 | 1.9693799988325165 | 0.6438210000373529 | 0.2691070769303308 |
2 | 1.169337960514499 | -1.2003500655322563 | -1.8876472885285303 | -0.21250262336698017 |
3 | -2.0149242228225352 | 0.3777196093249942 | 0.233921550979305 | 0.39979121524058253 |
4 | -0.8464428241042591 | -1.4782182304742935 | 0.22458711725118236 | -0.33396263618551913 |
5 | 1.2471920549543476 | -0.887490499184695 | -1.1041991362454315 | 1.932482592873003 |
6 | -0.8529853403070782 | -0.4909368945208435 | -0.7306423975841205 | 0.8578541353085531 |
7 | -0.38470545324770017 | 0.4812878996742971 | 0.565122377348464 | -1.277608770535443 |
8 | 1.2856180977412772 | -1.288566320945822 | -1.7868746145697005 | 1.0500431060289053 |
9 | 0.5182738763941376 | 0.36345933022878907 | -0.46301077093228676 | 0.4544283438001362 |
== Correlation Matrix ==
Analysis 2016-03-24 11:58:04 +0000 = Statsample::Bivariate.correlation_matrix