/*
 * Very simple "sar -d"-style disk performance measuring tool.
 *
 * Requires kernel performance monitoring patch for new /proc/partitions
 * fields.  
 *
 * This file may be redistributed under the terms of the GNU General
 * Public License.  
 *
 *	Changes
 *	- short names for devfs [dpg 2000/04/06]
 */

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <time.h>
#include <ctype.h>
#include <sys/fcntl.h>
#include <sys/time.h>

#include <string>
#include <map>

class Disk;

char * partition_file = "/proc/partitions";
char * stat_file = "/proc/stat";

struct timeval oldtime, newtime;
unsigned long delta;

int partition_fd;
int pagesize;

bool include_partitions = false;
bool quiet = false;

char *progname;
char *scratch;


map<string, Disk*> disks;

class Stats
{
public:
	Stats();
	static void difference(Disk &, const Stats &, const Stats &);

	unsigned long nr_blocks;
	
	unsigned int ios_in_flight;
	unsigned int io_ticks;
	unsigned int aveq;
	
	unsigned int rd_ios;
	unsigned int rd_merges;
	unsigned int rd_ticks;
	unsigned int rd_sectors;
	unsigned int wr_ios;
	unsigned int wr_merges;
	unsigned int wr_ticks;
	unsigned int wr_sectors;	
};

class Disk
{
public:
	Disk(string, int, int);
	
	string	name;
	string	short_name;
	int	major_nr;
	int	minor_nr;

	bool	is_partition;
	bool	seen_activity;
	
	Stats	stats;
};


Stats::Stats()
	: ios_in_flight(0),
	  io_ticks(0),
	  aveq(0),
	  rd_ios(0),
	  rd_merges(0),
	  rd_ticks(0),
	  rd_sectors(0),
	  wr_ios(0),
	  wr_merges(0),
	  wr_ticks(0),
	  wr_sectors(0)
{}

void Stats::difference(Disk &disk, 
		       const Stats &new_stats, 
		       const Stats &old_stats)
{
	int io_ticks	  = new_stats.io_ticks	    - old_stats.io_ticks;
	int aveq	  = new_stats.aveq	    - old_stats.aveq;
	int rd_ios	  = new_stats.rd_ios	    - old_stats.rd_ios;
	int rd_merges	  = new_stats.rd_merges	    - old_stats.rd_merges;
	int rd_ticks	  = new_stats.rd_ticks	    - old_stats.rd_ticks;
	int rd_sectors	  = new_stats.rd_sectors    - old_stats.rd_sectors;
	int wr_ios	  = new_stats.wr_ios	    - old_stats.wr_ios;
	int wr_merges	  = new_stats.wr_merges	    - old_stats.wr_merges;
	int wr_ticks	  = new_stats.wr_ticks	    - old_stats.wr_ticks;
	int wr_sectors	  = new_stats.wr_sectors    - old_stats.wr_sectors;

	int nr_ios	  = rd_ios          + wr_ios;
	int nr_ticks	  = rd_ticks        + wr_ticks;
	int nr_sectors	  = rd_sectors      + wr_sectors;

	if (!new_stats.rd_ios && !new_stats.wr_ios)
		return;
	if (!new_stats.nr_blocks)
		return;
	
	if (nr_ios)
		disk.seen_activity = true;
	if (!disk.seen_activity && quiet)
		return;

	float avwait;
	if (nr_ios)
		avwait = (float) nr_ticks / (float) nr_ios;
	else
		avwait = 0;

	printf ("%-8s %6.2f%% %6.2f  %3.0f:%-6d %3.0f:%-6d %3.0f:%-6d %5.1f\n",
		disk.short_name.c_str(),
		100.0 * (float) io_ticks / (float) delta, // % utilisation
		(float) aveq / (float) delta,	          // ave q len
		((float) rd_ios + 0.5) * 1000 / delta,	  // ios/sec read
		rd_sectors * 1000 / delta / 2,		  // kb/sec read 
		((float) wr_ios + 0.5) * 1000 / delta,	  // ios/sec write
		wr_sectors * 1000 / delta / 2,		  // kb/sec write
		((float) nr_ios + 0.5) * 1000 / delta,	  // ios/sec combined
		nr_sectors * 1000 / delta / 2, 		  // kb/sec combined
		avwait);				  // ms/req combined
}

Disk::Disk(string name, int major_nr, int minor_nr) 
	: name(name),
	  major_nr(major_nr),
	  minor_nr(minor_nr),
	  seen_activity(false)
{
	char c;
	static char obuff[256];
 	string::size_type len = name.length();

	c = name[len - 1];
	is_partition = (c >= '0' && c <= '9');
	short_name = name;
	if (len > 20) {
		const char * cp = name.c_str();
		string::size_type o_host, o_bus, o_target, o_lun, o_part;
		int h, b, t, l, p;
		char buff[128];

		o_host = name.find("host");
		o_bus = name.find("bus");
		o_target = name.find("target");
		o_lun = name.find("lun");
		if (is_partition)
		    o_part = name.find("part");
		if ((o_host == string::npos) || (o_bus == string::npos))
			return;
		if (1 != sscanf(cp + o_host + 4, "%d", &h))
			return;
		if (1 != sscanf(cp + o_bus + 3, "%d", &b))
			return;
		if (1 != sscanf(cp + o_target + 6, "%d", &t))
			return;
		if (1 != sscanf(cp + o_lun + 3, "%d", &l))
			return;
		if (is_partition) {
		    if (1 != sscanf(cp + o_part + 4, "%d", &p))
			    return;
		    sprintf(buff, "c%db%dt%du%dp%d", h, b, t, l, p);
		}
		else
		    sprintf(buff, "c%db%dt%du%d", h, b, t, l);
		short_name = buff;
	}
}

	
void check(const char *why, int error)
{
	if (!error)
		return;
	perror(why);
	exit(1);
}

void advance_nl(char *string, int &index)
{
	while (scratch[index] && scratch[index] != '\n')
		index++;
	while (scratch[index] == '\n')
		index++;
}

void read_partition()
{
	int c;
	int items;
	int size;
	int major_nr, minor_nr;
	char namebuf[64];
	Disk *disk;
	Stats stats;
	map<string, Disk *>::iterator i;
	
	check ("rewind partition",
	       lseek(partition_fd, 0, SEEK_SET) != 0);
	check ("read partition",
	       (size = read(partition_fd, scratch, pagesize-1)) == 0);

	c = 0;
	scratch[size] = 0;

	while (scratch[c]) {
		int dummy;

		items = sscanf(&scratch[c], "%d %d %d %63s %d %d %d %d %d %d %d %d %d %d %d",
			       &major_nr, &minor_nr,
			       &stats.nr_blocks,
			       namebuf,
			       &stats.rd_ios,
			       &stats.rd_merges,
			       &stats.rd_sectors,
			       &stats.rd_ticks,
			       &stats.wr_ios,
			       &stats.wr_merges,
			       &stats.wr_sectors,
			       &stats.wr_ticks,
			       &stats.ios_in_flight,
			       &stats.io_ticks,
			       &stats.aveq);
		if (items == 15) {
			string name = namebuf;
			i = disks.find(name);
			if (i == disks.end()) {
				disk = new Disk (name, major_nr, minor_nr);
				disks[name] = disk;
			} else {
				disk = i->second;
				if (include_partitions || !disk->is_partition)
					Stats::difference(*disk, stats, 
							  disk->stats);
			}
		
			disk->stats = stats;
		}
		advance_nl(scratch, c);
	}
}

void read_delta(void)
{
	int rc;
	
	oldtime = newtime;
	
	rc = gettimeofday(&newtime, NULL);
	check ("gettimeofday", rc);
	
	delta = (newtime.tv_sec - oldtime.tv_sec) * 1000 +
		(newtime.tv_usec - oldtime.tv_usec) / 1000;
}

void usage()
{
	fprintf(stderr, 
		"Usage:\n"
		"%s [-pq] [t]\n"
		"  -p: display partition accounts\n"
		"  -q: quiet, suppress inactive devices\n"
		"   t: update interval (seconds)\n",
		progname);
	exit(1);
}

int main(int argc, char *argv[])
{
	int interval = 5;
	int banner = 0;

	setlinebuf(stdout);
	
	progname = argv[0];
	
	char c;
	while ((c = getopt(argc, argv, "pq")) != EOF) {
		switch (c) {
		case 'p':
			include_partitions = true;
			break;
		case 'q':
			quiet = true;
			break;
		default:
			usage();
		}
	}
	
	if (optind < argc)
		interval = strtol(argv[optind++], NULL, 0);
		
	pagesize = getpagesize();
	scratch = new char[pagesize];
	
	partition_fd = open(partition_file, O_RDONLY, 0);
	check ("open part", partition_fd < 0);

	read_delta();
	read_partition();

	while (1) {
		char timestring[80];
		time_t current_time;
		
		if (!banner--) {
			banner = 10;
			printf ("\ndevice     %%util   qlen (ios:kB rd  ios:kB wr  ios:kB /sec) avwait (ms)\n");
		}

		sleep (interval);

		time(&current_time);
		strftime(timestring, 80, "[%X]", localtime(&current_time));
		puts(timestring);
		
		read_delta();
		read_partition();
	}
}
