Module Talon.Cols
Column selection utilities for working with subsets of columns.
These functions provide convenient ways to select columns by type, name patterns, or other criteria. They return lists of column names that can be used with functions like select, drop_columns, or row-wise aggregations.
val numeric : t -> string listnumeric df returns all numeric column names.
Includes float32, float64, int32, and int64 columns. Useful for operations that work on any numeric type.
Example:
let nums = Cols.numeric df in
let stats = select df nums |> describeval float : t -> string listfloat df returns all floating-point column names.
Includes float32 and float64 columns only, excluding integer types.
val int : t -> string listint df returns all integer column names.
Includes int32 and int64 columns only, excluding floating-point types.
val bool : t -> string listbool df returns all boolean column names.
val string : t -> string liststring df returns all string column names.
val matching : t -> Re.re -> string listmatching df regex returns column names matching the regex pattern.
Uses the Re library for pattern matching. Useful for selecting columns with systematic naming patterns.
Example:
let numeric_cols = Cols.matching df (Re.Posix.compile ".*_[0-9]+") in
(* Selects columns like "feature_1", "score_99", etc. *)val with_prefix : t -> string -> string listwith_prefix df prefix returns column names starting with prefix.
Example:
let temp_cols = Cols.with_prefix df "temp_" in
(* Selects "temp_morning", "temp_evening", etc. *)val with_suffix : t -> string -> string listwith_suffix df suffix returns column names ending with suffix.
Example:
let score_cols = Cols.with_suffix df "_score" in
(* Selects "math_score", "reading_score", etc. *)val except : t -> string list -> string listexcept df exclude returns all column names except those in exclude list.
Useful for selecting "everything but" a few specific columns.
Example:
let features = Cols.except df ["id"; "target"] in
(* All columns except "id" and "target" *)val select_dtypes :
t ->
[ `Numeric | `Float | `Int | `Bool | `String ] list ->
string listselect_dtypes df types returns column names of the specified types.
Type categories:
`Numeric: All numeric types (float32, float64, int32, int64)`Float: Floating-point types (float32, float64)`Int: Integer types (int32, int64)`Bool: Boolean columns`String: String columns
Example:
let numeric_and_bool = Cols.select_dtypes df [`Numeric; `Bool] in
(* Includes all numeric columns plus boolean columns *)