#!/usr/bin/perl

$slice = 20;	# seconds
while (<>) {
    @curr = split;
    if ($curr[0] =~ /cpu(\d+)/) {
	$per_cpu_curr[$1] = [ @curr ];
	$max_cpu = $1 if ($1 > $max_cpu);
	next;
    }
    next if (/^$/);
    if ($curr[0] eq "version") {
	if ($curr[1] != 4) {
	    die "Version mismatch. Update this tool.\n";
	}
	next;
    }
    if ($curr[0] eq "timestamp") {
	$delta = $curr[1] - $otimestamp;
	$otimestamp = $curr[1];
	next;
    }

    #
    # format of line in /proc/schedstat
    #
    # tag 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
    #
    # tag is "cpuN" or "cpu".  Right now, we ignore "cpuN" lines (this tool
    # doesn't collate per-cpu statistics, although it would be trivial to
    # do so.)
    #
    # version == 4
    # NOTE: the active queue is considered empty if it has only one process
    #	in it, since obviously the process calling sched_yield is that process.
    #
    # First four are sched_yield statistics:
    #     1) # of times both the active and the expired queue were empty
    #     2) # of times just the active queue was empty
    #     3) # of times just the expired queue was empty
    #     4) # of times sched_yield() was called
    #
    # Next three are schedule() statistics:
    #     5) # of times the active queue had at least one other process on it.
    #     6) # of times we switched to the expired queue and reused it
    #     7) # of times schedule() was called
    #
    # Next seven are statistics dealing with load balancing:
    #     8) # of times load_balance was called at an idle tick
    #     9) # of times load_balance was called at an busy tick
    #    10) # of times load_balance was called from schedule()
    #	 11) # of times load_balance was called
    #	 12) sum of imbalances discovered (if any) with each call to
    #        load_balance
    #	 13) # of times load_balance was called when we did not find a
    #	     "busiest" queue
    #	 14) # of times load_balance was called from balance_node()
    #
    # Next four are statistics dealing with pull_task():
    #    15) # of times pull_task gave a task to this cpu
    #    16) # of times pull_task took a task from this cpu
    #    17) # of times pull_task gave a task to this node
    #	 18) # of times pull_task took a task from this node
    #
    # Next two are statistics dealing with balance_node():
    #    19) # of times balance_node was called
    #    20) # of times balance_node was called at an idle tick
    #
    # Next three are statistics dealing with scheduling latency:
    #	 21) sum of all time spent running by tasks on this processor (in ms)
    #	 22) sum of all time spent waiting to run by tasks on this processor
    #	     (in ms)
    #	 23) # of tasks (not necessarily unique) given to the processor
    #

    foreach $i (1..23) {
	$diff[$i] = $curr[$i] - $prev[$i];
    }

    for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	@arr_curr = @{$per_cpu_curr[$cpu]};
	@arr_prev = @{$per_cpu_prev[$cpu]};
	foreach $i (1..23) {
	    $arr_diff[$i] = $arr_curr[$i] - $arr_prev[$i];
	}
	$per_cpu_diff[$cpu] = [ @arr_diff ];
    }

    #for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
#	print "@{$per_cpu_curr[$cpu]}\n";
#    }
#    print "@curr\n";
    if (!$delta) {
	$timestamp = $tick*$slice*1000;
	$timestart = 0;
    } else {
	$timestart = $delta if (!$timestart);
	$timestamp += $delta;
    }
    printf "%02d:%02d:%02d--------------------------------------------------------------\n",
	($timestamp-$timestart)/3600000,
	(($timestamp-$timestart)/60000)%60,
	(($timestamp-$timestart)/1000)%60;

    #
    # sched_yield() stats
    #
    printf "    %7d          sys_sched_yield()\n", $diff[4];
    printf "    %7d(%6.2f%%) found (only) active queue empty on current cpu\n",
	$diff[2]-$diff[1], $diff[4] ? (100*($diff[2]-$diff[1])/$diff[4]) : 0;
    printf "    %7d(%6.2f%%) found (only) expired queue empty on current cpu\n",
	$diff[3], $diff[4] ? (100*$diff[3]/$diff[4]) : 0;
    printf "    %7d(%6.2f%%) found both queues empty on current cpu\n",
	$diff[1], $diff[4] ? (100*$diff[1]/$diff[4]) : 0;
    printf "    %7d(%6.2f%%) found neither queue empty on current cpu\n\n",
	$diff[4]-($diff[3]+$diff[2]),
	$diff[4] ? 100*($diff[4]-($diff[3]+$diff[2]))/$diff[4] : 0;

    #
    # schedule() stats
    #
    printf "    %7d          schedule()\n", $diff[7];
    printf "    %7d(%6.2f%%) switched active and expired queues\n",
	$diff[6], $diff[7] ? (100*$diff[6]/$diff[7]) : 0;
    printf "    %7d(%6.2f%%) used existing active queue\n",
	$diff[5]-$diff[6], $diff[7] ? (100*($diff[5]-$diff[6])/$diff[7]) : 0;
    printf "    %7d(%6.2f%%) processor went idle\n\n",
	$diff[7] - $diff[5], $diff[7] ? (100*($diff[7] - $diff[5])/$diff[7]) : 0;

    #
    # load_balance() stats
    #
    printf "    %7d          load_balance()\n", $diff[11];
    printf "    %7d(%6.2f%%) called while idle\n", $diff[8],
	$diff[11] ? 100*$diff[8]/$diff[11] : 0;
    printf "    %7d(%6.2f%%) called while busy\n", $diff[9],
	$diff[11] ? 100*($diff[9])/$diff[11] : 0;
    printf "    %7d(%6.2f%%) called from schedule()\n", $diff[10],
	$diff[11] ? 100*$diff[10]/$diff[11] : 0;
    printf "    %7d(%6.2f%%) called from balance_node()\n", $diff[14],
	$diff[11] ? 100*$diff[14]/$diff[11] : 0;
    printf "             %7d no \"busiest\" queue found\n",$diff[13];
    if ($diff[11]-$diff[13]) {
	$imbalance = $diff[12] / ($diff[11]-$diff[13]);
	if ($imbalance < 10) {
	    printf "             %7.3f average imbalance (over %d)\n",
		$imbalance, $diff[11]-$diff[13];
	} elsif ($imbalance < 100) {
	    printf "            %8.2f average imbalance (over %d)\n",
		$imbalance, $diff[11]-$diff[13];
	} else {
	    printf "           %9.1f average imbalance (over %d)\n",
		$imbalance, $diff[11]-$diff[13];
	}
    }
    else {
	printf "                     no imbalances\n";
    }

    #
    # pull_task() stats
    #
    print "\n";
    $ntotal = $total = 0;
    for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	@arr = @{$per_cpu_diff[$cpu]};
	$total += $arr[15];
	$ntotal += $arr[17];
    }
    printf "    %7d          pull_task()\n", $total;
    printf "    %7d          total tasks moved between cpus\n", $total;
    for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	@arr = @{$per_cpu_diff[$cpu]};
	if ($arr[15] || $arr[16]) {
	    printf "      %7d/%-7d  cpu %2d lost/gained task to/from another cpu\n",
		$arr[15], $arr[16], $cpu;
	}
    }
    printf "    %7d          total tasks moved between nodes\n", $ntotal;
    for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	@arr = @{$per_cpu_diff[$cpu]};
	if ($arr[17] || $arr[18]) {
	    printf "      %7d/%-7d  cpu %2d lost/gained task to/from another node\n",
		$arr[17], $arr[18], $cpu;
	}
    }
    print "\n";

    #
    # balance_node() stats
    #
    printf "    %7d          balance_node()\n", $diff[19];
    printf "    %7d(%6.2f%%) called while idle\n", $diff[20],
	$diff[19] ? 100*$diff[20]/$diff[19] : 0;
    printf "    %7d(%6.2f%%) called while busy\n", $diff[19] - $diff[20],
	$diff[19] ? 100*(($diff[19]-$diff[20]))/$diff[19] : 0;
    printf("\n");

    #
    # latency stats
    #
    printf "                     Latency\n";
    for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	@arr = @{$per_cpu_diff[$cpu]};
	if ($arr[23] && ($arr[21] || $arr[22])) {
	    printf "    %6.2f/%-6.2f    avg runtime/latency on cpu %d (ms)\n",
		$arr[21]/$arr[23], $arr[22]/$arr[23], $cpu;
	}
    }

    printf("\n");
    @prev = @curr;
    @per_cpu_prev = @per_cpu_curr;
    $tick++;
}
