Ruby – 如何通过条件最大值的多个键检索数组中的和

原始数组是

[ {"id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00", "order"=>"001", "order1"=>"1"}, {"id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00", "order"=>"001", "order1"=>"1"}, {"id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00", "order"=>"002", "order1"=>"2"} ] 

预期的数组应该:

 [ {"id"=>2, "idx"=>112, "money"=>"6.00", "money1"=>"3.00","order"=>"001", "order1"=>"1"}, {"id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00","order"=>"002", "order1"=>"2"} ] 

该调用喜欢f_addition(arr, group_fields, sum_fields, max_fields) (例如: f_addition(arr, ["order","order1"], ["money","money1"], ["id", "idx"]

P / s:调用方法后不应更改原始文件(它再次用作新group_fields,new sum_fields,new max_fields的参数)

主题Hash of Arrays中的类似问题,group by和sum by with many columns

这样做的一种方法是使用Hash#update (aka merge! )的forms,它使用块来确定合并的两个哈希中存在的键的值。

 def f_addition(arr, group_fields, sum_fields, max_fields) arr.each_with_object({}) do |h,g| g.update( h.values_at(*group_fields) => h ) do |_,gv,hv| gv.merge(hv) do |k,gvv,hvv| case when sum_fields.include?(k) then "%.2f" % (gvv.to_f + hvv.to_f) when max_fields.include?(k) then [gvv, hvv].max else gvv end end end end.values end 

 arr = [{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00", "order"=>"001", "order1"=>"1", "pet"=>"dog" }, { "id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00", "order"=>"001", "order1"=>"1", "sport"=>"darts" }, { "id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00", "order"=>"002", "order1"=>"2" }] 

请注意,此数组与问题中给出的数组略有不同。 我已将"pet"=>"dog"到第一个(hash)元素"sport"=>"darts"和第二个哈希。

 f_addition(arr, ["order","order1"], ["money","money1"], ["id", "idx"] ) #=> [{ "id"=>2, "idx"=>112, "money"=>"6.00", "money1"=>"3.00", # "order"=>"001", "order1"=>"1", "pet"=>"dog", "sport"=>"darts"}, # { "id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00", # "order"=>"002", "order1"=>"2" }] 

说明

对于上面的例子:

 group_fields = ["order", "order1"] sum_fields = ["money", "money1"] max_fields = ["id", "idx"] enum = arr.each_with_object({}) #=> #2, "idx"=>111,..., "pet"=>"dog"}, # {"id"=>1, "idx"=>112,..., "sport"=>"darts"}, # {"id"=>3,"idx"=>113,...,"order1"=>"2"}]:each_with_object({})> 

Array#将每个枚举器的元素传递给块并将其分配给块变量。 传递的第一个元素是:

 h, g = enum.next #=> [{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00", "order"=>"001", "order1"=>"1", "pet"=>"dog" }, {}] h #=> { "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00", "order"=>"001", "order1"=>"1", "pet"=>"dog" } g #=> {} 

如:

 h.values_at(*group_fields) #=> h.values_at(*["order", "order1"]) #=> h.values_at("order", "order1") #=> ["001", "1"] 

我们计算:

 g.update(["001", "1"] => h) do |k,gv,hv| ... end 

这是简写​​:

 g.update({ ["001", "1"] => h }) do |k,gv,hv| ... end 

do |k,gv,hv| ... end do |k,gv,hv| ... end仅在合并的两个哈希值都包含密钥k1由于g = {}包含任何键,因此此时不使用该块:

 g.update({ ["001", "1"] => h }) #=> {}.update({ ["001", "1"]=>{ "id"=>2, "idx"=>111, "money"=>"4.00", # "money1"=>"1.00", "order"=>"001", # "order1"=>"1", "pet"=>"dog" } } #=> { ["001", "1"]=>{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00", # "order"=>"001", "order1"=>"1", "pet"=>"dog" } } 

update返回的值是g的新值。

传入块的enum的下一个值是:

 h, g = enum.next h #=> { "id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00", # "order"=>"001", "order1"=>"1", "sport"=>"darts" }, g #=> { ["001", "1"]=>{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00", # "order"=>"001", "order1"=>"1", "pet"=>"dog" } }] 

如:

 h.values_at(*group_fields) #=> h.values_at("order", "order1") #=> ["001", "1"] 

我们计算:

 g.update(["001", "1"] => h) do |k,gv,hv| ... end 

由于g{ ["001", "1"] => h }都包含键[“001”,“1”],我们必须遵循该块来确定合并散列中该键的值。 我们有:

 k = ["001", "1"] gv = { "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00", "order"=>"001", "order1"=>"1", "pet"=>"dog" } hv = { "id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00", "order"=>"001", "order1"=>"1", "sport"=>"darts" } 

因此,我们按如下方式评估块(使用merge而不是merge!/update ):

 gv.merge(hv) do |k,gvv,hvv| case when sum_fields.include?(k) then "%.2f" % (gvv.to_f + hvv.to_f) when max_fields.include?(k) then [gvv, hvv].max else gvv end end #=> { "id"=>2, "idx"=>112, "money"=>"6.00", "money1"=>"3.00", # "order"=>"001", "order1"=>"1", "pet"=>"dog", "sport"=>"darts"} 

gv不包含关键的“运动”,因此在将"sport"=>"darts"合并为gv时不使用该块。 然而, hvv所有其他键都存在于gvv中,因此我们使用该块来确定合并散列中的值。 对于:

 k = "money" gvv = "4.00" hvv = "2.00" 

我们发现:

 sum_fields.include?(k) #=> ["money", "money1"].include?("money") #=> true 

所以case语句返回:

 "%.2f" % (gvv.to_f + hvv.to_f) #=> "%.2f" % ("4.00".to_f + "2.00".to_f) #=> "6.00" 

hv其他键的值(合并到gv的哈希值)类似地计算,以给出合并的哈希g的新值。

最后,

 { ["002", "order1"] => { "id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00", "order"=>"002", "order1"=>"2" }] 

被合并到g (不需要使用update的块),并且该方法返回g.values

意见

很容易将其概括为传递对,例如:

 [["money","money1"], ->(a,b) { "%.2f" % (a.to_f + b.to_f) }] [["id", "idx"], :max] 

这可以如下完成:

 def f_addition(arr, group_fields, *mods) arr.each_with_object({}) do |h,g| g.update( h.values_at(*group_fields) => h ) do |_,gv,hv| gv.merge(hv) do |k,gvv,hvv| f,op = mods.find { |f,op| f.include?(k) } if f case op when Proc then op.call(gvv,hvv) when Symbol then [gvv, hvv].send(op) end else gvv end end end end.values end f_addition(arr, ["order","order1"], [["money","money1"], ->(a,b) { "%.2f" % (a.to_f + b.to_f) }], [["id", "idx"], :max]) # => [{ "id"=>2, "idx"=>112, "money"=>"6.00", "money1"=>"3.00", # "order"=>"001", "order1"=>"1", "pet"=>"dog", "sport"=>"darts" }, # { "id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00", # "order"=>"002", "order1"=>"2" }] 
  我们会发现块中的计算不依赖于块变量`k`。 
     因此我用局部变量_替换了该变量,以便通知读者。 

这应该与你提出的要求非常接近

 def f_addition(arr, group_fields, sum_fields, max_fields) arr.group_by do |h| group_fields.map {|k| h[k]} end.values.map do |group| sums = group.each_with_object(Hash.new(0)) do |h, acc| sum_fields.each {|sum_f| acc[sum_f] += h[sum_f].to_f } end group.first.merge(sums) end.flatten end arr = [ {"id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00", "order"=>"001", "order1"=>"1"}, {"id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00", "order"=>"001", "order1"=>"1"}, {"id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00", "order"=>"002", "order1"=>"2"} ] f_addition(arr, ["order","order1"], ["money","money1"], ["id", "idx"] ) # [ # {"id"=>2, "idx"=>111, "money"=>6.0, "money1"=>3.0, "order"=>"001", "order1"=>"1"} # {"id"=>3, "idx"=>113, "money"=>3.0, "money1"=>1.0, "order"=>"002", "order1"=>"2"} # ] 

我用过这段代码

 def aggregate(arr, group_fields, sum_fields, max_fields) arr.group_by { |x| x.values_at(*group_fields) }.map {|key, hashes| result = hashes[0].clone max_fields.each { |k| hashes.map! {|h| h.merge(h) { |k, v| Integer(v) rescue v }} #hashes.map! { |h| h.each_pair { | k, v | h[k] = Integer(v) rescue v }} result[k] = hashes.max_by { |h| h[k]}[k] } sum_fields.each { |k| result[k] = hashes.inject(0) { |s, x| s + x[k].to_f } } result }end 

有一点不好的是将所有字段转换为int(转换为int以进行比较,例如: "id"=>"12""id"=>"2" )。 应该只转换max_fields,但我还没有想到解决方案。 转换代码是:

hashes.map! {|h| h.merge(h) { |k, v| Integer(v) rescue v }} hashes.map! {|h| h.merge(h) { |k, v| Integer(v) rescue v }}

hashes.map! { |h| h.each_pair { | k, v | h[k] = Integer(v) rescue v

所以,如果有人能够解决这个弱点,那就太好了。