#!/usr/bin/perl

use Getopt::Std;

$curr_version = 6;

$YLD_BOTH_EMPTY		= 1;	$PT_GAINED_IDLE		= 16;
$YLD_ACT_EMPTY		= 2;	$PT_LOST_IDLE		= 17;
$YLD_EXP_EMPTY		= 3;	$PT_GAINED_NOTIDLE	= 18;
$YLD_CNT		= 4;	$PT_LOST_NOTIDLE	= 19;
$SCHED_NOSWITCH		= 5;	$ALB_CNT		= 20;
$SCHED_SWITCH		= 6;	$ALB_GAINED		= 21;
$SCHED_CNT		= 7;	$ALB_LOST		= 22;
$LB_IDLE		= 8;	$SBE_CNT		= 23;
$LB_BUSY		= 9;	$MTC_CNT		= 24;
$LB_CNT			= 10;	$LBNI_CNT		= 25;
$LB_IMBALANCE		= 11;	$LBNI_IMBALANCE		= 26;
$LB_NOBUSYG		= 12;	$CPU_CPUTIME		= 27;
$LB_NOBUSYQ		= 13;	$CPU_RUNDELAY		= 28;
$PT_GAINED_NEWIDLE	= 14;	$CPU_TRIPCNT		= 29;
$PT_LOST_NEWIDLE	= 15;

die "Usage: $0 [-t] [file]\n" unless &getopts("tc");

while (<>) {
    @curr = split;
    if ($curr[0] =~ /cpu(\d+)/) {
	$per_cpu_curr[$1] = [ @curr ];
	$max_cpu = $1 if ($1 > $max_cpu);
	next;
    }
    next if (/^$/);
    if ($curr[0] eq "version") {
	if ($curr[1] != $curr_version) {
	    die "$0: Version mismatch: input is version $curr[0] but this tool\nis for version $curr_version.\n";
	}
	next;
    }
    if ($curr[0] eq "timestamp") {
	$delta = $curr[1] - $otimestamp;
	$otimestamp = $curr[1];
	next;
    }

    #
    # format of line in /proc/schedstat
    #
    # tag 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 \
    #	  25 26 27 28 29
    #
    # tag is "cpuN" or "cpu".  Right now, we ignore "cpuN" lines (this tool
    # doesn't collate per-cpu statistics, although it would be trivial to
    # do so.)
    #
    # version == 6
    # NOTE: the active queue is considered empty if it has only one process
    #	in it, since obviously the process calling sched_yield is that process.
    #
    # First four are sched_yield statistics:
    #     1) # of times both the active and the expired queue were empty
    #     2) # of times just the active queue was empty
    #     3) # of times just the expired queue was empty
    #     4) # of times sched_yield() was called
    #
    # Next three are schedule() statistics:
    #     5) # of times the active queue had at least one other process on it.
    #     6) # of times we switched to the expired queue and reused it
    #     7) # of times schedule() was called
    #
    # Next six are statistics dealing with load balancing:
    #     8) # of times load_balance was called at an idle tick
    #     9) # of times load_balance was called at an busy tick
    #	 10) # of times load_balance was called
    #	 11) sum of imbalances discovered (if any) with each call to
    #        load_balance
    #	 12) # of times load_balance was called when we did not find a
    #	     "busiest" cpu group
    #	 13) # of times load_balance was called when we did not find a
    #	     "busiest" run queue
    #
    # Next six are statistics dealing with pull_task():
    #    14) # of times pull_task gave a task to this cpu when newly idle
    #    15) # of times pull_task took a task from this cpu when newly idle
    #    16) # of times pull_task gave a task to this cpu when idle
    #    17) # of times pull_task took a task from this cpu when idle
    #    18) # of times pull_task gave a task to this cpu when busy
    #    19) # of times pull_task took a task from this cpu when busy
    #
    # Next three are statistics dealing with active_load_balance():
    #    20) # of times active_load_balance() was called
    #    21) # of times active_load_balance() caused this cpu to gain a task
    #    22) # of times active_load_balance() caused this cpu to lose a task
    #
    # Next two are function call counters:
    #    23) # of times sched_balance_exec() was called
    #    24) # of times migrate_to_cpu() was called
    #
    # Next two are dealing with load_balance_newidle():
    #    25) # of times load_balance_newidle was called
    #	 26) sum of imbalances discovered (if any) with each call to
    #        load_balance_newidle()
    #
    # Next three are statistics dealing with scheduling latency:
    #	 27) sum of all time spent running by tasks on this processor (in ms)
    #	 28) sum of all time spent waiting to run by tasks on this processor
    #	     (in ms)
    #	 29) # of tasks (not necessarily unique) given to the processor
    #

    if (defined(@prev)) {

	foreach $i (1..29) {
	    $diff[$i] = $curr[$i] - $prev[$i];
	}

	for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	    @arr_curr = @{$per_cpu_curr[$cpu]};
	    @arr_prev = @{$per_cpu_prev[$cpu]};
	    foreach $i (1..29) {
		$arr_diff[$i] = $arr_curr[$i] - $arr_prev[$i];
	    }
	    $per_cpu_diff[$cpu] = [ @arr_diff ];
	}
    }

    $timestart = $delta if (!$timestart);
    $timestamp += $delta;

    if (!$opt_t) {
	print_diffs();
	@prev = @curr;
	@per_cpu_prev = @per_cpu_curr;
    } else {
	if (!defined(@prev)) {
	    @prev = @curr;
	    @per_cpu_prev = @per_cpu_curr;
	}
    }
}

print_diffs() if ($opt_t);

sub print_diffs {
    printf "%02d:%02d:%02d--------------------------------------------------------------\n",
	($timestamp-$timestart)/3600000,
	(($timestamp-$timestart)/60000)%60,
	(($timestamp-$timestart)/1000)%60;

    #
    # sched_yield() stats
    #
    printf "    %7d          sys_sched_yield()\n", $diff[$YLD_CNT];
    printf "    %7d(%6.2f%%) found (only) active queue empty on current cpu\n",
	$diff[$YLD_ACT_EMPTY]-$diff[$YLD_BOTH_EMPTY],
	$diff[$YLD_CNT] ?
	    (100*($diff[$YLD_ACT_EMPTY]-$diff[$YLD_BOTH_EMPTY])/
		$diff[$YLD_CNT]) : 0;
    printf "    %7d(%6.2f%%) found (only) expired queue empty on current cpu\n",
	$diff[$EXP_EMPTY],
	$diff[$YLD_CNT] ? (100*$diff[$EXP_EMPTY]/$diff[$YLD_CNT]) : 0;
    printf "    %7d(%6.2f%%) found both queues empty on current cpu\n",
	$diff[$YLD_BOTH_EMPTY],
	$diff[$YLD_CNT] ? (100*$diff[$YLD_BOTH_EMPTY]/$diff[$YLD_CNT]) : 0;
    printf "    %7d(%6.2f%%) found neither queue empty on current cpu\n\n",
	$diff[$YLD_CNT]-($diff[$EXP_EMPTY]+$diff[$YLD_ACT_EMPTY]),
	$diff[$YLD_CNT] ?
	    100*($diff[$YLD_CNT]-($diff[$EXP_EMPTY]+$diff[$YLD_ACT_EMPTY]))/
		$diff[$YLD_CNT] : 0;

    #
    # schedule() stats
    #
    printf "    %7d          schedule()\n", $diff[$SCHED_CNT];
    printf "    %7d(%6.2f%%) switched active and expired queues\n",
	$diff[$SCHED_SWITCH], $diff[$SCHED_CNT] ? (100*$diff[$SCHED_SWITCH]/$diff[$SCHED_CNT]) : 0;
    printf "    %7d(%6.2f%%) used existing active queue\n\n",
	$diff[$SCHED_NOSWITCH]-$diff[$SCHED_SWITCH], $diff[$SCHED_CNT] ? (100*($diff[$SCHED_NOSWITCH]-$diff[$SCHED_SWITCH])/$diff[$SCHED_CNT]) : 0;
    #
    # this calculation is broken
    #
    #printf "    %7d(%6.2f%%) processor went idle\n\n",
	$diff[$SCHED_CNT] - $diff[$SCHED_SWITCH], $diff[$SCHED_CNT] ? (100*($diff[$SCHED_CNT] - $diff[$SCHED_SWITCH])/$diff[$SCHED_CNT]) : 0;

    #
    # load_balance() stats
    #
    printf "    %7d          load_balance()\n", $diff[$LB_CNT];
    printf "    %7d(%6.2f%%) called while idle\n", $diff[$LB_IDLE],
	$diff[$LB_CNT] ? 100*$diff[$LB_IDLE]/$diff[$LB_CNT] : 0;
    printf "    %7d(%6.2f%%) called while busy\n", $diff[$LB_BUSY],
	$diff[$LB_CNT] ? 100*($diff[$LB_BUSY])/$diff[$LB_CNT] : 0;
    printf "    %7d(%6.2f%%) no \"busiest group\" found\n", $diff[$LB_NOBUSYG],
	$diff[$LB_CNT] ? 100*($diff[$LB_NOBUSYG])/$diff[$LB_CNT] : 0;
    printf "    %7d(%6.2f%%) no \"busiest queue\" found\n", $diff[$LB_NOBUSYQ],
	$diff[$LB_CNT] ? 100*($diff[$LB_NOBUSYQ])/$diff[$LB_CNT] : 0;
    if ($diff[$LB_CNT]-$diff[$LB_NOBUSYG]-$diff[$LB_NOBUSYQ]) {
    #printf "    imbalance=%d, count=%d\n", $diff[$LB_IMBALANCE],
	    ($diff[$LB_CNT]-$diff[$LB_NOBUSYG]-$diff[$LB_NOBUSYQ]);
	$imbalance = $diff[$LB_IMBALANCE] /
	    ($diff[$LB_CNT]-$diff[$LB_NOBUSYG]-$diff[$LB_NOBUSYQ]);
	if ($imbalance < 10) {
	    $fmt = "%7.3f";
	} elsif ($imbalance < 100) {
	    $fmt = "%8.2f";
	} else {
	    $fmt = "%9.1f";
	}
	printf "             $fmt average imbalance (over %d)\n",
	    $imbalance, $diff[$LB_CNT]-$diff[$LB_NOBUSYG]-$diff[$LB_NOBUSYQ];
    }
    else {
	printf "                     no imbalances\n";
    }

    #
    # pull_task() stats
    #
    print "\n";
    $total = $idle = $busy = $alb = $newidle = 0;
    for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	@arr = @{$per_cpu_diff[$cpu]};
	$total += ($arr[$PT_GAINED_NEWIDLE] + $arr[$PT_GAINED_IDLE]
	    + $arr[$PT_GAINED_NOTIDLE] + $arr[$ALB_GAINED]);
	$newidle += $arr[$PT_GAINED_NEWIDLE];
	$idle += $arr[$PT_GAINED_IDLE];
	$busy += $arr[$PT_GAINED_NOTIDLE];
	$alb += $arr[$ALB_GAINED];
    }
    printf "    %7d          pull_task()\n", $total;
    printf "    %7d          total tasks moved between cpus\n", $total;
    if ($opt_c) {
	for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	    @arr = @{$per_cpu_diff[$cpu]};
	    if ($arr[$PT_GAINED_NEWIDLE] || $arr[$PT_LOST_NEWIDLE]) {
		printf "    %7d/%-7d  cpu %2d lost/gained task to/from another cpu when newly idle\n",
		    $arr[$PT_GAINED_NEWIDLE], $arr[$PT_LOST_NEWIDLE], $cpu;
	    }
	    if ($arr[$PT_GAINED_IDLE] || $arr[$PT_LOST_IDLE]) {
		printf "    %7d/%-7d  cpu %2d lost/gained task to/from another cpu while idle\n",
		    $arr[$PT_GAINED_IDLE], $arr[$PT_LOST_IDLE], $cpu;
	    }
	    if ($arr[$PT_GAINED_NOTIDLE] || $arr[$PT_LOST_NOTIDLE]) {
		printf "    %7d/%-7d  cpu %2d lost/gained task to/from another cpu when busy\n",
		    $arr[$PT_GAINED_NOTIDLE], $arr[$PT_LOST_NOTIDLE], $cpu;
	    }
	    if ($arr[$ALB_GAINED] || $arr[$ALB_LOST]) {
		printf "    %7d/%-7d  cpu %2d lost/gained task to/from another cpu from active_load_balance()\n",
		    $arr[$ALB_GAINED], $arr[$ALB_LOST], $cpu;
	    }
	}
    } else {
	printf "    %7d(%6.2f%%) moved when newly idle\n",
	    $newidle, $total ? 100*($newidle/$total) : 0;
	printf "    %7d(%6.2f%%) moved while idle\n",
	    $idle, $total ? 100*($idle/$total) : 0;
	printf "    %7d(%6.2f%%) moved while busy\n",
	    $busy, $total ? 100*($busy/$total) : 0;
	printf "    %7d(%6.2f%%) moved from active_load_balance()\n",
	    $alb, $total ? 100*($alb/$total) : 0;
    }
    print "\n";

    #
    # active_load_balance() stats
    #
    printf "    %7d          active_load_balance()\n", $diff[$ALB_CNT];

    #
    # function call counts
    #
    printf "    %7d          sched_balance_exec()\n", $diff[$SBE_CNT];
    printf "    %7d          migrate_to_cpu()\n", $diff[$MTC_CNT];
    printf("\n");

    #
    # load_balance_newidle() stats
    #
    printf "    %7d          load_balance_newidle()\n", $diff[$LBNI_CNT];
    if ($diff[$LBNI_CNT]) {
	$imbalance = $diff[$LBNI_IMBALANCE] / $diff[$LBNI_CNT];
	if ($imbalance < 10) {
	    $fmt = "%7.3f";
	} elsif ($imbalance < 100) {
	    $fmt = "%8.2f";
	} else {
	    $fmt = "%9.1f";
	}
	printf "             $fmt average imbalance\n", $imbalance;
    }
    else {
	printf "                     no imbalances\n";
    }

    printf("\n");

    #
    # latency stats
    #
    printf "                     Latency\n";
    for ($cpu = 0; $cpu <= $max_cpu; $cpu++) {
	@arr = @{$per_cpu_diff[$cpu]};
	if ($arr[$CPU_TRIPCNT] && ($arr[$CPU_CPUTIME] || $arr[$CPU_RUNDELAY])) {
	    printf "    %6.2f/%-6.2f    avg runtime/latency on cpu %d (ms)\n",
		$arr[$CPU_CPUTIME]/$arr[$CPU_TRIPCNT],
		$arr[$CPU_RUNDELAY]/$arr[$CPU_TRIPCNT], $cpu;
	}
    }

    printf("\n");
}
