05-reductions-and-statistics
Summarize data with reductions — means, variances, and aggregations along any axis. This example analyzes daily temperature readings across four cities.
dune exec nx/examples/05-reductions-and-statistics/main.exe
What You'll Learn
- Reducing along specific axes with
mean,std,sum - Finding extremes and their positions with
min,max,argmax - Computing running totals with
cumsum - Preserving dimensions for broadcasting with
keepdims - Detecting outliers using z-score normalization
- Testing conditions with
allandany
Key Functions
| Function | Purpose |
|---|---|
mean ~axes t |
Average values along specified axes |
std ~axes t |
Standard deviation along axes |
min t / max t |
Global minimum / maximum |
min ~axes t / max ~axes t |
Per-axis minimum / maximum |
argmax ~axis t |
Index of the maximum along an axis |
cumsum ~axis t |
Cumulative sum along an axis |
all t / any t |
Test if all / any elements are true |
greater_s t s |
Element-wise t > s returning a bool array |
less_s t s |
Element-wise t < s returning a bool array |
Output Walkthrough
The dataset is a 4×7 matrix — 4 cities, 7 days of temperature readings:
let city_means = mean ~axes:[ 1 ] temps in
City averages:
Paris mean=22.9 std=2.3
Cairo mean=32.0 std=2.1
Helsinki mean=-5.6 std=2.6
London mean=14.9 std=1.3
Axis semantics
~axes:[1]reduces across columns (days) → one value per city~axes:[0]reduces across rows (cities) → one value per day- No axis → reduces everything to a scalar
Outlier detection with z-scores
Using keepdims:true to broadcast the mean and std against the original data:
let mu = mean ~axes:[ 1 ] ~keepdims:true temps in
let sigma = std ~axes:[ 1 ] ~keepdims:true temps in
let z_scores = (temps - mu) / sigma in
let outlier_mask = greater_s (abs z_scores) 1.5
Condition testing
let all_above_zero = all (greater_s temps 0.0) in (* false — Helsinki *)
let any_below_neg5 = any (less_s temps (-5.0)) in (* true — Helsinki *)
Try It
- Compute the daily average across all cities with
mean ~axes:[0]and find which day was warmest on average. - Use
cumsum ~axis:1on the full temperature matrix to see running totals per city. - Find the day with the smallest temperature range across cities using
max ~axes:[0]minusmin ~axes:[0].
Next Steps
Continue to 06-random-numbers to generate synthetic data with controlled, reproducible distributions.
(** Summarize data with reductions — means, variances, and aggregations along
any axis.
Analyze daily temperature readings across four cities. Compute averages,
find extremes, track running totals, and flag outliers. *)
open Nx
open Nx.Infix
let () =
(* Daily temperatures (°C) for 4 cities over 7 days. Rows = cities, columns =
days. *)
let temps =
create float64 [| 4; 7 |]
[|
22.0;
24.0;
19.0;
25.0;
23.0;
21.0;
26.0;
(* Paris *)
30.0;
32.0;
35.0;
31.0;
29.0;
33.0;
34.0;
(* Cairo *)
-5.0;
-8.0;
-3.0;
-10.0;
-2.0;
-7.0;
-4.0;
(* Helsinki *)
15.0;
14.0;
16.0;
13.0;
17.0;
15.0;
14.0;
(* London *)
|]
in
let cities = [| "Paris"; "Cairo"; "Helsinki"; "London" |] in
Printf.printf "Daily temperatures (4 cities × 7 days):\n%s\n\n"
(data_to_string temps);
(* --- Per-city statistics (reduce along axis 1 = across days) --- *)
let city_means = mean ~axes:[ 1 ] temps in
let city_stds = std ~axes:[ 1 ] temps in
Printf.printf "City averages:\n";
for i = 0 to 3 do
Printf.printf " %-10s mean=%.1f std=%.1f\n" cities.(i)
(item [ i ] city_means) (item [ i ] city_stds)
done;
print_newline ();
(* --- Hottest day per city (argmax along axis 1) --- *)
let hottest_day = argmax ~axis:1 temps in
Printf.printf "Hottest day per city:\n";
for i = 0 to 3 do
Printf.printf " %-10s day %ld\n" cities.(i) (item [ i ] hottest_day)
done;
print_newline ();
(* --- Global extremes --- *)
Printf.printf "Warmest reading: %.1f°C\n" (item [] (max temps));
Printf.printf "Coldest reading: %.1f°C\n\n" (item [] (min temps));
(* --- Cumulative sum: running total of Cairo's temperatures --- *)
let cairo = temps.${[ I 1; A ]} in
let cumulative = cumsum ~axis:0 cairo in
Printf.printf "Cairo daily: %s\n" (data_to_string cairo);
Printf.printf "Cairo cumulative: %s\n\n" (data_to_string cumulative);
(* --- Outlier detection with z-scores --- *)
let mu = mean ~axes:[ 1 ] ~keepdims:true temps in
let sigma = std ~axes:[ 1 ] ~keepdims:true temps in
let z_scores = (temps - mu) / sigma in
let outlier_mask = greater_s (abs z_scores) 1.5 in
Printf.printf "Z-scores:\n%s\n" (data_to_string z_scores);
Printf.printf "Outliers (|z| > 1.5): %s\n\n" (data_to_string outlier_mask);
(* --- Check if all/any values meet a condition --- *)
let all_above_zero = all (greater_s temps 0.0) in
let any_below_neg5 = any (less_s temps (-5.0)) in
Printf.printf "All temps > 0? %b\n" (item [] all_above_zero);
Printf.printf "Any temp < -5? %b\n" (item [] any_below_neg5)