Wide Features

Working with wide datasets that have many numeric columns. Selects feature columns by prefix, computes row-wise sums and weighted dot products, and sorts by score.

(*---------------------------------------------------------------------------
  Copyright (c) 2026 The Raven authors. All rights reserved.
  SPDX-License-Identifier: ISC
  ---------------------------------------------------------------------------*)

open Talon

let () =
  (* A “wide” frame: 5 rows x 8 numeric features *)
  let df =
    create
      [
        ("id", Col.string_list [ "u1"; "u2"; "u3"; "u4"; "u5" ]);
        ("feat_1", Col.float64_list [ 1.; 4.; 2.; 3.; 1. ]);
        ("feat_2", Col.float64_list [ 0.; 1.; 1.; 1.; 2. ]);
        ("feat_3", Col.float64_list [ 3.; 0.; 1.; 2.; 0. ]);
        ("feat_4", Col.float64_list [ 5.; 2.; 0.; 1.; 3. ]);
        ("feat_5", Col.float64_list [ 2.; 2.; 2.; 2.; 2. ]);
        ("feat_6", Col.float64_list [ 1.; 0.; 1.; 0.; 1. ]);
        ("feat_7", Col.float64_list [ 0.5; 0.2; 0.1; 0.3; 0.4 ]);
        ("feat_8", Col.float64_list [ 10.; 9.; 7.; 13.; 8. ]);
      ]
  in

  (* Select all feature columns by prefix *)
  let feats = Cols.with_prefix df "feat_" in

  (* Row-wise sum across many columns (vectorized) *)
  let df = add_column df "row_sum" (Row.Agg.sum ~skipna:true df ~names:feats) in

  (* Weighted score (dot product) *)
  let weights = Array.of_list [ 0.1; 0.1; 0.1; 0.1; 0.1; 0.05; 0.05; 0.4 ] in
  let df = add_column df "score" (Row.Agg.dot df ~names:feats ~weights) in

  (* Sort by score descending *)
  let df = sort_values ~ascending:false df "score" in

  print ~max_rows:10 df