require 'daru' df = Daru::DataFrame.new({a: [1,2,3,4,5], b: [10,14,15,17,44]}) df.plot legends: [:a, :b], type: :line do |p,d| p.yrange [0,100] p.legend true d.color "green" end require 'daru' # Calculate statistics of numeric columns df = Daru::DataFrame.new({ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'], b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'], c: ['small','large','large','small','small','large','small','large','small'], d: [1,2,2,3,3,4,5,6,7], e: [2,4,4,6,6,8,10,12,14], f: [10,20,20,30,30,40,50,60,70] }) df.mean # Calculate multiple statistical measures in one shot df.describe # Create a multi-indexed DataFrame tuples = [ [:a,:one,:bar], [:a,:one,:baz], [:a,:two,:bar], [:a,:two,:baz], [:b,:one,:bar], [:b,:two,:bar], [:b,:two,:baz], [:b,:one,:foo], [:c,:one,:bar], [:c,:one,:baz], [:c,:two,:foo], [:c,:two,:bar] ] multi_index = Daru::MultiIndex.new(tuples) vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14] vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4] order_mi = Daru::MultiIndex.new([ [:a,:one,:bar], [:a,:two,:baz], [:b,:two,:foo], [:b,:one,:foo]]) df_mi = Daru::DataFrame.new([ vector_arry1, vector_arry2, vector_arry1, vector_arry2], order: order_mi, index: multi_index) # Specify complete tuple to choose a single row df_mi.row[:a, :one,:bar] # Specify partial tuple to select index hierarchially df_mi.row[:a] # See grouped rows with the 'groups' method df = Daru::DataFrame.new({ a: %w{foo bar foo bar foo bar foo foo}, b: %w{one one two three two two one three}, c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8], d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88] }) grouped = df.group_by([:a, :b]) grouped.groups # First group by the columns :a and :b and then calculate mean of the grouped rows. grouped.mean grouped.get_group(["foo", "one"]) require 'daru' sales = Daru::DataFrame.from_csv '/home/sameer/sales-funnel.csv' sales.pivot_table index: [:manager, :rep] sales.pivot_table(index: [:manager,:rep], values: :price,vectors: [:product], agg: :sum) df = Daru::DataFrame.new({ a: ['ff' , 'fwwq', 'efe', 'a', 'efef', 'zzzz', 'efgg', 'q', 'ggf'], b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'], c: ['small','large','large','small','small','large','small','large','small'], d: [-1,2,-2,3,-3,4,-5,6,7], e: [2,4,4,6,6,8,10,12,14] }) df.sort([:a,:d], by: {a: lambda {|a,b| a.length <=> b.length }, b: lambda {|a,b| a.abs <=> b.abs }}, ascending: [false, true])