/* $Header$ */

/* knistnet.c - Linux implementation of "hitbox"-like functionality.
 * This code exists as a loadable kernel module.  It gains access to the
 * entry points it needs through some patches to the existing Linux kernel.
 * (Unfortunately, there seemed to be no practical alternative to patching
 * to gain access to the basic packet handling routine entry points.)
 * The user-level interface provided is a device driver one, modeled on the
 * original SunOS-based hitbox.
 *
 * Mark Carson, NIST/UMPC
 *	1/1997
 *
 * I modified this source code so that it would implement drop-tail queue management
 * discipline instead of DRD.  Basically, you can set the DRD max queue parameter in
 * the NIST Net interface which gets used as the drop-tail queue maximum length.  If
 * you find a bug in my code please fix it and let the NIST Net mailing list or me
 * know.
 *
 * Later,
 *  James Nichols, WPI-CS
 *  6/2003
 *  jnick@cs.wpi.edu
 */

#include "kincludes.h"

/* The following can only be included in one place! */
#define EXPORT_SYMTAB
#include <linux/module.h>

#include "tabledist.h"

int nistnet_debug;

/* Breakpoints are only helpful when we're compiled into the kernel */
#ifndef MODULE

#ifdef DEBUG
#define BREAKPOINT(string) asm("   int $3")
#define DEBUG_SPINLOCKS 2
#else
#define BREAKPOINT(string) printk(string)/*@@minidebug@@*/
#endif

#else /* MODULE */

#ifdef DEBUG
#define DEBUG_SPINLOCKS 2
#endif

#define BREAKPOINT(string) printk(string)/*@@minidebug@@*/

#endif /* MODULE */

spinlock_t LinLockVar = SPIN_LOCK_UNLOCKED;

static int lock_ticker;
#define LinLock(string) \
	do {++lock_ticker;\
		if (nistnet_debug > 4 && (!(lock_ticker&0x3f) || nistnet_debug > 5)) \
			printk("lock %s", string); \
		spin_lock_irqsave(&LinLockVar, pre_flags);} while (0)
#define LinUnlock(string) \
	do {if (nistnet_debug > 4 && (!(lock_ticker&0x3f) || nistnet_debug > 5)) \
		printk(" unlock %s\n", string); \
	spin_unlock_irqrestore(&LinLockVar, pre_flags);} while (0)

#ifdef notdef
#define HASHSIZE 256
static struct lin_hitbox *hittable[HASHSIZE];
#endif

#ifdef DEBUG
static int hittable_count;
#endif

struct nistnet_globalstats ourstats;
#define	STATS_START	0
#define	STATS_PROCESS	1
#define STATS_UNPROCESS	2

void fixed_gettimeofday(struct timeval *tv);
void lin_hash_stats(int number);

void fast_fill(void);
void fast_empty(void);
struct fast_timer_list * fast_alloc(int how);
void fast_free(struct fast_timer_list *done);


int
addnistnet(NistnetTableEntryPtr entry)
{
	/* Check entry for sanity */
	if (entry->lteStats.hitreq.drd_min &&
	    entry->lteStats.hitreq.drd_min >= entry->lteStats.hitreq.drd_max)
		return -EINVAL;
#ifdef CONFIG_ECN
	if (entry->lteStats.hitreq.drd_congestion) {
		if (entry->lteStats.hitreq.drd_min > entry->lteStats.hitreq.drd_congestion ||
			entry->lteStats.hitreq.drd_max < entry->lteStats.hitreq.drd_congestion)
				return -EINVAL;
	}
#else
	entry->lteStats.hitreq.drd_congestion = 0;
#endif
	/* Should ignore these fields, but it doesn't feel right... */
	entry->lteOldDrop = entry->lteDrop;
	entry->lteOldDup = entry->lteDup;
	entry->lteOldDelay = entry->lteDelay;
	entry->lteOldDelsigma = entry->lteDelsigma;

	/* Initialize packet timer */
	fixed_gettimeofday(&entry->lteStats.last_packet);
	entry->lteStats.next_packet = entry->lteStats.last_packet;
	/* Insert in table */
	if (!lt_add(entry))
		return -ENOMEM;
#ifdef DEBUG
	++hittable_count;
#endif
	return 0;
}

int
addhitreq(struct lin_hitreq *hitreq)
{
	NistnetTableEntry nistnetreq;

	bzero(&nistnetreq, sizeof(nistnetreq));
	nistnetreq.lteSource = hitreq->src;
	nistnetreq.lteDest = hitreq->dest;
	nistnetreq.lteStats.hitreq = *hitreq;
	nistnetreq.lteDrop = hitreq->p_drop;
	nistnetreq.lteDup = hitreq->p_dup;
	nistnetreq.lteDelay = hitreq->delay;
	nistnetreq.lteDelsigma = hitreq->delsigma;
	return addnistnet(&nistnetreq);
}

int
rmnistnet(NistnetTableEntryPtr entry)
{
	/* Remove from table */
	switch (lt_rm(entry)) {
	case 1:
		break;
	case 0:
		return -ESRCH;
	case -1:
		return -EFAULT;
	}
#ifdef DEBUG
	--hittable_count;
#endif
	return 0;
}

int
rmhitreq(struct lin_hitreq *hitreq)
{
	NistnetTableEntry nistnetreq;

	bzero(&nistnetreq, sizeof(nistnetreq));
	nistnetreq.lteSource = hitreq->src;
	nistnetreq.lteDest = hitreq->dest;
	nistnetreq.lteStats.hitreq = *hitreq;
	return rmnistnet(&nistnetreq);
}

int
gethitstats(struct lin_hitstats *hitstats)
{
	NistnetTablePtr tableptr;
	int i;

	tableptr = lt_find_by_srcdest(hitstats->hitreq.src, hitstats->hitreq.dest);
	if (!tableptr)
		return -ESRCH;
	/* Put things where the old stuff expects it */
	tableptr->ltEntry.lteOldDrop = tableptr->ltEntry.lteDrop;
	tableptr->ltEntry.lteOldDup = tableptr->ltEntry.lteDup;
	*hitstats = tableptr->ltEntry.lteStats;
	/* Compute current_bandwidth */
	for (i=0; i < BAND_ARRAY; ++i)
		hitstats->current_bandwidth +=
			hitstats->bandwidth_array[i];
	if (hitstats->seats_used)
		hitstats->current_bandwidth /= hitstats->seats_used;
	return 0;
}

int
getnistnet(NistnetTableEntryPtr where)
{
	struct lin_hitstats *hitstats;
	NistnetTablePtr tableptr;
	int i;

	tableptr = lt_find_by_key(&where->lteKey, NULL);
	if (!tableptr)
		return -ESRCH;

	/* Copy things where the old stuff expects it */
	tableptr->ltEntry.lteOldDrop = tableptr->ltEntry.lteDrop;
	tableptr->ltEntry.lteOldDup = tableptr->ltEntry.lteDup;

	*where = tableptr->ltEntry;
	hitstats = &where->lteStats;
	where->lteOldDelay = where->lteDelay;
	where->lteOldDelsigma = where->lteDelsigma;
	/* Compute current_bandwidth */
	for (i=0; i < BAND_ARRAY; ++i)
		hitstats->current_bandwidth +=
			hitstats->bandwidth_array[i];
	if (hitstats->seats_used)
		hitstats->current_bandwidth /= hitstats->seats_used;
	return 0;
}

/* nice and high, but it is tunable: insmod hitmod.o major=your_selection */
static int major = HITMAJOR;

/*
 * The driver.
 */

/* read -- get what's in the table */
static
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
ssize_t
hw_read(struct file * file, char * buf, size_t count, loff_t *whoknows)
#else
int
hw_read(struct inode * node, struct file * file, char * buf, int count)
#endif
{
	extern int DumpPairs(char *buf, int count);
	extern int lt_read(char *buf, int count);

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
	switch (MINOR(file->f_dentry->d_inode->i_rdev))
#else
	switch (MINOR(node->i_rdev))
#endif
	{
	case HITMINOR:
		return DumpPairs(buf, count);
	case NISTNETMINOR:
		return lt_read(buf, count);
	default:
		return 0;
	}
}

static
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
ssize_t
hw_write(struct file * file, const char * buf, size_t count, loff_t *whoknows)
#else
int
hw_write(struct inode * node,struct file * file,const char * buf,int count)
#endif
{
	int ret;

	if (count >= tabledistsize()) {
		ret = tabledistfill(buf);
		if (ret) return ret;
		return tabledistsize();
	} else
		return -E2BIG;	/* ha, ha */
}

/*
 * mucky muck ioctl interface
 */
static int
hw_ioctl(struct inode * inode, struct file * file, unsigned int type, unsigned long arg)
{
	struct lin_hitreq hitreq;
	NistnetTableEntry nistnetreq;
	extern void kick_fast_rtc(void);
	int rc = 0;

	switch (type) {
	case HITIOCTL_OFF:
	case NISTNET_OFF:
		/* Shut down */
		ourstats.emulator_on=0;
		break;
	case HITIOCTL_ON:
	case NISTNET_ON:
		/* Turn on if not already on */
		ourstats.emulator_on=1;
		break;
	case HITIOCTL_ADD:
		/* Get what they want */
		copy_from_user_ret(&hitreq, (struct lin_hitreq *)arg,
			sizeof(struct lin_hitreq), -EFAULT);
		/* Add it to the table */
		rc = addhitreq(&hitreq);
		break;
	case NISTNET_ADD:
		/* Get what they want */
		copy_from_user_ret(&nistnetreq, (NistnetTableEntryPtr)arg,
			sizeof(NistnetTableEntry), -EFAULT);
		/* Add it to the table */
		rc  = addnistnet(&nistnetreq);
		break;
	case HITIOCTL_REMOVE:
		/* Get what they want */
		copy_from_user_ret(&hitreq, (struct lin_hitreq *)arg,
			sizeof(struct lin_hitreq), -EFAULT);
		/* Remove it from the table */
		rc = rmhitreq(&hitreq);
		break;
	case NISTNET_REMOVE:
		/* Get what they want */
		copy_from_user_ret(&nistnetreq, (NistnetTableEntryPtr)arg,
			sizeof(NistnetTableEntry), -EFAULT);
		/* Remove it from the table */
		rc = rmnistnet(&nistnetreq);
		break;
	case HITIOCTL_STATS:
		{
		struct lin_hitstats hitstats;

		/* Get what they want */
		copy_from_user_ret(&hitstats, (struct lin_hitstats *)arg,
			sizeof(struct lin_hitstats), -EFAULT);
		rc = gethitstats(&hitstats);
		/* Copy out individual stats */
		if (!rc) {
			copy_to_user_ret((struct lin_hitstats *)arg,
				&hitstats,
				sizeof(struct lin_hitstats), -EFAULT);
		}
		break;
		}
	case NISTNET_STATS:
		/* Get what they want */
		copy_from_user_ret(&nistnetreq, (NistnetTableEntryPtr)arg,
			sizeof(NistnetTableEntry), -EFAULT);
		rc = getnistnet(&nistnetreq);
		/* Copy out individual stats */
		if (!rc) {
			copy_to_user_ret((NistnetTableEntryPtr)arg,
				&nistnetreq,
				sizeof(NistnetTableEntry), -EFAULT);
		}
		break;
	case HITIOCTL_MODE:
		/* ?? */
		break;
	case HITIOCTL_TIMER:
		/* ?? */
		break;
	case HITIOCTL_MTU:
		/* ?? */
		break;
	case HITIOCTL_DEBUG:
	case NISTNET_DEBUG:
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
		rc = get_user(nistnet_debug, (long *)arg);
		if (rc < 0) break;
#else
		nistnet_debug = get_user((long *)arg);
#endif
#ifdef LT_DEBUG
		lt_set_debug_level(nistnet_debug);
#endif
		break;
	case HITIOCTL_GLOBALSTATS:
		copy_to_user_ret((struct lin_globalstats *)arg,
			&ourstats.l,
			sizeof(struct lin_globalstats), -EFAULT);
		break;
	case HITIOCTL_NGLOBALSTATS:
	case NISTNET_GLOBALSTATS:
		copy_to_user_ret((struct nistnet_globalstats *)arg,
			&ourstats,
			sizeof(struct nistnet_globalstats), -EFAULT);
		break;
	case NISTNET_KICK:
		kick_fast_rtc();
		break;
	case NISTNET_FLUSH:
		flush_fast_timer_list();
		break;
	}
	return rc;
}


/*
 * Our special open code.
 * MOD_INC_USE_COUNT make sure that the driver memory is not freed
 * while the device is in use.
 */
static int
hw_open( struct inode* ino, struct file* filep)
{
	MOD_INC_USE_COUNT;
	return 0;   
}

/*
 * Now decrement the use count.
 */
static
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
int
#else
void
#endif
hw_close( struct inode* ino, struct file* filep)
{
	MOD_DEC_USE_COUNT;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
	return 0;
#endif
}

static struct file_operations hw_fops = {
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,3,25)
	owner:	THIS_MODULE,	/* struct module *owner */
	read:	hw_read,	/* read - get emulator table */
	write:	hw_write,	/* write - fill distribution table */
	ioctl:	hw_ioctl,	/* ioctl - most of the controls */
	open:	hw_open,	/* open */
	release:	hw_close,	/* release */

#else /* 2.0 and 2.2 versions */

	NULL,		/* lseek - n/a */
	hw_read,	/* read - get emulator table */
	hw_write,	/* write - fill distribution table */
	NULL,		/* readdir - n/a */
	NULL,		/* poll/select - n/a */
	hw_ioctl,	/* ioctl - most of the controls */
	NULL,		/* mmap - n/a, for now at least */
	hw_open,	/* open */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
	NULL,		/* flush */
#endif
	hw_close,	/* release */
	NULL,		/* fsync */
	NULL,		/* fasync */
	NULL,		/* check_media_change */
	NULL		/* revalidate */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
	,
	NULL		/* lock */
#endif

#endif /* 2.0 and 2.2 versions */

};


/* Various statistics.  We record some running totals in circular arrays,
 * to give an idea of how things are going.
 */
#define band_seat(time) ((time.tv_sec)%BAND_ARRAY)


/* How well is the hash table doing? */
void
lin_hash_stats(int number)
{
	static int hash_slot;

	/* average 50/50 with the last value we got */
	ourstats.l.hash_tries[hash_slot] = (number + ourstats.l.hash_tries[hash_slot]) >> 1;
	hash_slot = (hash_slot+1)%BAND_ARRAY;
}

/* How long is our processing time? */
void
global_stats(int process)
{
	static struct timeval start;
	struct timeval end;
	static int process_slot, unprocess_slot;
	long int usec_time;

	switch(process) {
	case STATS_START:
		fixed_gettimeofday(&start);
		return;
	case STATS_PROCESS:
		fixed_gettimeofday(&end);
		usec_time = timeval_diff(&end, &start);
		/* Check for bogus time values */
		if (nistnet_debug && usec_time < 0) {
			printk("nistnet: pretty fast processing, %ld usecs!\n", usec_time);
			return;
		}
		/* Average in with previous value 50/50 */
		ourstats.l.process_overhead[process_slot] =
			(usec_time+ourstats.l.process_overhead[process_slot])>> 1;
		process_slot = (process_slot+1)%BAND_ARRAY;
		break;
	case STATS_UNPROCESS:
		fixed_gettimeofday(&end);
		usec_time = timeval_diff(&end, &start);
		/* Check for bogus time values */
		if (nistnet_debug && usec_time < 0) {
			printk("nistnet: pretty fast unprocessing, %ld usecs!\n", usec_time);
			return;
		}
		/* Average in with previous value 50/50 */
		ourstats.l.unprocess_overhead[unprocess_slot] =
			(usec_time+ourstats.l.unprocess_overhead[unprocess_slot])>> 1;
		unprocess_slot = (unprocess_slot+1)%BAND_ARRAY;
		break;
	}
}

/* What's the traffic like on this entry? */
void
packet_stats(struct sk_buff *skb, struct lin_hitstats *hitme)
{
	struct timeval our_time;
	long packettime;
	int last_seat, our_seat, seat;

	if (!skb->len)
		return;
	fixed_gettimeofday(&our_time);
	last_seat = band_seat(hitme->last_packet);
	our_seat = band_seat(our_time);
	packettime = timeval_diff(&our_time, &hitme->last_packet);
	/* Check for bogus time values */
	if (nistnet_debug && packettime < 0) {
			printk("nistnet: packet arrived in %ld usecs!\n", packettime);
			goto after_bandwidth;
		}
	hitme->last_packet = our_time;
	/* compute bandwidth */
	if (packettime > BAND_ARRAY*MILLION) {
		/* too long since last packet; zero out everything */
		memset(hitme->bandwidth_array, 0,
			BAND_ARRAY*sizeof(unsigned long));
		hitme->seats_used = 1;
	} else if (last_seat != our_seat) {	/* zero out skipped intervals */
		for (seat = (last_seat+1)%BAND_ARRAY; seat != our_seat;
			seat = (seat+1)%BAND_ARRAY) {
				hitme->bandwidth_array[seat] = 0;
		}
		/* plus get this one! */
		hitme->bandwidth_array[our_seat] = 0;
		if (hitme->seats_used < BAND_ARRAY)
			++hitme->seats_used;
	}
	hitme->bandwidth_array[our_seat] += skb->len;

after_bandwidth:
	hitme->current_size = skb->len;
	hitme->bytes_sent += skb->len;
	return;
}

#ifdef DEBUG
void
check_skb(struct sk_buff *skb, char *where)
{
	if (skb->data < skb->head) {
		printk("bug:check_skb:under:%s\n", where);
	}
	if (skb->tail>skb->end) {
		printk("bug:check_skb:over:%s\n", where);
	}
}
#endif /* DEBUG */

/* Receive packet interception */
static struct packet_type *ippt;

static struct packet_type ourpt;

/* We use an arbitrary spot in the skb control buffer to mark packets
 * which we've already processed.
 */
#define NISTNET_CB_MAGIC	66
#define NISTNET_CB_MAGIC_SPOT	33
#define we_saw_skb(skb)   (skb->cb[NISTNET_CB_MAGIC_SPOT] == NISTNET_CB_MAGIC)
#define gaze_at_skb(skb)  (skb->cb[NISTNET_CB_MAGIC_SPOT] = NISTNET_CB_MAGIC)


/* Resume processing of a delayed packet */
void
runpacket(struct fast_timer_list *info)
{
	struct nistnet_packetinfo *hpi = (struct nistnet_packetinfo *)info->data;
	unsigned long pre_flags;

	LinLock("runpacket1");
	packet_stats(hpi->skb, &hpi->nte->lteStats);
	LinUnlock("runpacket1");
	/* Non-local save/restore of flags doesn't work on some
	 * architectures (notably Suns).  We should be in an OK
	 * situation without it, though...
	 */
	/*restore_flags(hpi->flags);*/
#ifdef DEBUG
	check_skb(hpi->skb, "third");
#endif
	/* Mark this one as already having been queued */
	gaze_at_skb(hpi->skb);
	(void) netif_rx(hpi->skb);
	LinLock("runpacket2");
	if (!hpi->nte->lteStats.qlen) {
		BREAKPOINT("zero queue in runpacket");
	} else {
		--hpi->nte->lteStats.qlen;
	}
	fast_free(info);
	LinUnlock("runpacket2");
	MOD_DEC_USE_COUNT;
}

/* This now does drop tail instead of doing DRD */
/* packet_drop - compute probability of dropping packet.  If max_q is set
 * check to see if the the queue length is above max_q.  If it is then
 * drop the packet, otherwise drop according the random drop probability
 * (which may be 0).  If max_q is not set then just do a fixed random 
 * drop probability.
 *  Modified by jnick@cs.wpi.edu to do drop tail --
 *
 *
 * Return 1 if packet is to be dropped, 0 otherwise.
 */
int
packet_drop(NistnetTablePtr tableme, int *use_drd, int *use_ecn)
{
	struct lin_hitstats *hitme;
	int value;

	hitme = &tableme->ltEntry.lteStats;
	*use_drd=0;
	*use_ecn=0;
	if (hitme->hitreq.drd_max) {	/* using DRD */

	    value = (myrandom()&0xffff);
	    *use_drd=1;

	    // Over max queue length -- drop it!
	    if (hitme->qlen >= hitme->hitreq.drd_max)
		return 1;
#ifdef CONFIG_ECN
	    /* If using ECN check to see if the queue length is 
	     * below max queue length but above ECN limit.
	     * If so then mark the ECN bit before forwarding.
	     */
	    else if (hitme->qlen <= hitme->hitreq.drd_congestion)
	    	*use_ecn=1;
#endif /* CONFIG_ECH */

	// Do a random drop?
		if (tableme->ltEntry.lteDrop) {
                        value = (correlatedrandom(&tableme->ltEntry.lteIDrop)&0xffff);
                        return value < tableme->ltEntry.lteDrop;
                } else {
                        return 0;
                }

	} else {
		// The max queue length isn't set, do random drop probability 
		// if specified
		if (tableme->ltEntry.lteDrop) {
			value = (correlatedrandom(&tableme->ltEntry.lteIDrop)&0xffff);
			return value < tableme->ltEntry.lteDrop;
		} else {
			return 0;
		}
	}
}

#ifdef CONFIG_ECN
/* ecn_skb - mark a packet for explicit congestion notification, if supported.
 */
int
ecn_skb(struct sk_buff *skb)
{
	struct iphdr *iph;

	/* Get the ip header */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
	iph = skb->nh.iph;
#else
	iph = skb->h.iph;
#endif
	/* Check if ecn enabled */
	if (!(iph->tos & ECN_CAPABLE))
		return -1;
	/* Munge bit, if not already munged */
	if (!(iph->tos & ECN_NOTED)) {
		unsigned long checksum;

		iph->tos |= ECN_NOTED;
		/* Adjust checksum to account for munged bit */
		/* ip checksum is 1's complement in network byte order... */
		checksum = iph->check - htons(ECN_NOTED);
		checksum += checksum >>16;	/* catch carry */
		iph->check = checksum;
	}
	return 0;
}
#endif /* CONFIG_ECN */

/* Determine the amount of time to delay a packet.  This is the maximum
 * of two quantities:
 *	1. Probabilistic packet delay time
 *	2. Bandwidth-limitation delay time
 *
 * Question: Should we take probabilistic delay into account in determining
 * bandwidth consumption?  Answer: This complicates things a little too much.
 * Our model is that bandwidth throttling happens first at one virtual
 * choke point, then packets may get independently delayed at some later
 * point.  This can result in packets getting bunched up, so the stated
 * bandwidth limitation is actually exceeded at some point.
 *
 * The problem with taking the delay into account is that the simplest
 * way of doing so would remove any possibility of reordering packets --
 * each packet could not be sent any sooner than its predecessor.  It
 * thus seems more useful in terms of network effects to do things the
 * way they are here.
 */
int
packet_delay(struct sk_buff *skb, NistnetTablePtr tableme)
{
	int probdelay=0, bandwidthdelay=0, delay=0;
	struct lin_hitstats *hitme;
	struct timeval our_time={0,0};
	long packettime=0;

	hitme = &tableme->ltEntry.lteStats;

	/* Figure probabilistic delay */
	probdelay = correlatedtabledist(&tableme->ltEntry.lteIDelay);

	/* Figure bandwidth-limitation delay */
	if (hitme->hitreq.bandwidth) {

		fixed_gettimeofday(&our_time);
		/* We can't send until queued packets have been taken care of */
		bandwidthdelay = timeval_diff(&hitme->next_packet,
					&our_time);
		if (bandwidthdelay < 0) {
			bandwidthdelay = 0;
			hitme->next_packet = our_time;
		}
		/* Now determine how much time this packet will take in
		 * usec, in order to schedule the following one.
		 */
		/* skb->len can sometimes be too big (with other junk)(?) */
		packettime = (long)skb->len*(MILLION/hitme->hitreq.bandwidth) +
				((long)skb->len*(MILLION%hitme->hitreq.bandwidth)
					+ hitme->hitreq.bandwidth/2)/hitme->hitreq.bandwidth;
		/* Quick defensive hack: even at 1 byte/second, a packet
		 * shouldn't take longer than MTU seconds!
		 */
		if (packettime < 0 || packettime > 1500*MILLION) {
			if (nistnet_debug)
				printk("nistnet: wacky packettime of %ld, with length %ld and bandwidth %d\n",
					(long)packettime, (long)skb->len, hitme->hitreq.bandwidth);
			packettime = 0;
		} else {
			timeval_add(&hitme->next_packet, packettime);
		}
#if defined(CONFIG_DELAYMIDDLE)
		bandwidthdelay += packettime/2;
#elif defined(CONFIG_DELAYEND)
		bandwidthdelay += packettime;
#elif defined(CONFIG_DELAYSTART)
#endif

	}

	delay = probdelay > bandwidthdelay ? probdelay : bandwidthdelay;

	if (nistnet_debug > 4) {	/* Print what we're doing every once in a while */
		static int ticker;

		if (!(ticker&0x3f)) {
			printk("nistnet: packet size %ld packettime %ld usec delay %ld usec\n",
				(long)skb->len, (long) packettime, (long)delay);
			if (bandwidthdelay)
				printk("nistnet: current time is %d.%06d, will send at %d.%06d\n",
					(int)our_time.tv_sec, (int)our_time.tv_usec,
					(int)hitme->next_packet.tv_sec, (int)hitme->next_packet.tv_usec);
		}
		++ticker;
	}

	return usec_to_minijiffy(delay);
}

int
packet_dup(NistnetTablePtr tableme)
{
	int value;

	if (!tableme->ltEntry.lteDup) return 0;
	value = (correlatedrandom(&tableme->ltEntry.lteIDup)&0xffff);
	return value < tableme->ltEntry.lteDup;
}

int
default_munger(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct lin_hitbox *hitme)
{
	return 1;
}

int
DefaultNistnetMunger(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, NistnetTableEntry *hitme)
{
	return 1;
}

packet_munger other_munger = default_munger;
NistnetMunger OtherNistnetMunger = DefaultNistnetMunger;

void addmunge(packet_munger munger)
{
	other_munger = munger;
}

void AddNistnetMunger(NistnetMunger munger)
{
	/* New mungers take precedence */
	other_munger = default_munger;
	OtherNistnetMunger = munger;
}

void rmmunge(packet_munger munger)
{
	if (munger == other_munger)
		other_munger = default_munger;
}

void RmNistnetMunger(NistnetMunger munger)
{
	if (munger == OtherNistnetMunger)
		OtherNistnetMunger = DefaultNistnetMunger;
}

#define munge_finish(string)	{LinUnlock(string); if (skb2) (void) rcv_packet_munge(skb2, dev, ptype);/* recursively process dup */ }

int
rcv_packet_munge(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype)
{
	unsigned long pre_flags;

	LinLock("rcv_packet1");
	if (ourstats.emulator_on && !we_saw_skb(skb)) {
		int use_drd, use_ecn, delaytime;
		NistnetTablePtr tableme;
		struct lin_hitstats *hitme;
		struct lin_hitbox dummy;
		struct fast_timer_list *screamer;
		struct nistnet_packetinfo *hpi;
		struct sk_buff *skb2=NULL;
		int ret=1;

		global_stats(STATS_START);
		tableme = lt_find_by_ipheader(skb);
		/*tableme = lt_find_by_srcdest(skb->h.iph->saddr, skb->h.iph->daddr);*/
		if (tableme) {
			hitme = &tableme->ltEntry.lteStats;
			if (other_munger != default_munger) {
				dummy.stats = *hitme;
				dummy.next = NULL;
				ret = (*other_munger)(skb, dev, ippt, &dummy);
			} else {
				ret = (*OtherNistnetMunger)(skb, dev, ippt, &tableme->ltEntry);
			}
		} else {
			hitme = NULL;
			if (other_munger != default_munger) {
				ret = (*other_munger)(skb, dev, ippt, NULL);
			} else {
				ret = (*OtherNistnetMunger)(skb, dev, ippt, NULL);
			}
		}
		if (ret <= 0) {
			global_stats(STATS_PROCESS); /* well, sort of */
			LinUnlock("global_stats1");
			return ret;
		}
		if (!tableme) {	/* not intercepting */
			global_stats(STATS_UNPROCESS);
			LinUnlock("global_stats2");
			return ippt->func(skb, dev, ippt);
		}

		/* Assume we will queue until we find otherwise */
		++hitme->qlen;

		/* See if we're going to drop the packet */
		if (packet_drop(tableme, &use_drd, &use_ecn)) {
#ifdef CONFIG_ECN
			/* ecn behavior: mark packet, don't drop */
			if (use_ecn && ecn_skb(skb) == 0) {
				++hitme->drd_ecns;
			} else {
#endif /* CONFIG_ECN */
				our_kfree_skb(skb, FREE_WRITE);
				--hitme->qlen;
				++hitme->n_drops;
				if (use_drd)
					++hitme->drd_drops;
				else
					++hitme->rand_drops;
				global_stats(STATS_PROCESS);
				LinUnlock("global_stats3");
				return 0;
#ifdef CONFIG_ECN
			}
#endif /* CONFIG_ECN */
		}

		/* See if we're going to duplicate the packet.  Here,
		 * we just do fixed probability.
		 */

		if (packet_dup(tableme)) { /* you get a new sister! */
			++hitme->dups;
			skb2 = skb_copy(skb, GFP_ATOMIC);
		}

		/* Now see if we're going to delay the packet */
		if (!(delaytime = packet_delay(skb, tableme))) { /* no delay */
			--hitme->qlen;
			packet_stats(skb, hitme);

			global_stats(STATS_PROCESS);
			munge_finish("no delay");
			return ippt->func(skb, dev, ippt);
		}

		screamer = fast_alloc(GFP_ATOMIC);
		/* If we can't allocate, punt! */
		if (!screamer) {
			--hitme->qlen;
			++hitme->mem_drops;
			packet_stats(skb, hitme);
			global_stats(STATS_PROCESS);
			munge_finish("fast_alloc failed");
			return ippt->func(skb, dev, ippt);
		}
		hpi = (struct nistnet_packetinfo *)screamer->data;

		init_fast_timer(screamer);
#ifdef DEBUG
		check_skb(skb, "first");
#endif
		hpi->skb = skb_unshare(skb, GFP_ATOMIC);
#ifdef DEBUG
		check_skb(hpi->skb, "second");
#endif
		hpi->dev = dev;
		hpi->nte = &tableme->ltEntry;
		/* We don't actually use this anymore, as non-local
		 * save/restore of flags turns out not to work on
		 * some architectures (notably Suns).  But we'll
		 * leave it in to indicate what we were thinking
		 * about...
		 */
		hpi->flags = pre_flags;

		/* Schedule something to happen in a little while */
		screamer->expires = delaytime;
		MOD_INC_USE_COUNT;
		add_fast_timer(screamer);
		global_stats(STATS_PROCESS);
		munge_finish("reg delay");
		return 0;
	} else {
		LinUnlock("ippt->func");
		return ippt->func(skb, dev, ippt);
	}
}

void
grab_ip_rcv(void)
{
	struct packet_type *us;

	ourpt.type = htons(ETH_P_IP);	/* IP packets (only) */
	ourpt.dev = NULL;		/* wild card, for any dev */
	ourpt.func = rcv_packet_munge;	/* our handler */
	ourpt.data = NULL;		/* nothing we need to keep */
	ourpt.next = NULL;		/* filled out by dev_add_pack */
	/* Add our handler */
	dev_add_pack(&ourpt);
	/* Now we search for the old one.  Yes, this is a dirty trick.
	 * We are using our proffered handler as a Trojan horse to get
	 * at the old one.  Heh, heh, heh.
	 */
	us = &ourpt;
	for (us = us->next; us; us = us->next) {
		if (us->type == ourpt.type) {	/* Got 'em! */
			printk("grab_ip_rcv: Found ippt at %lx\n",
				(unsigned long int) us);
			ippt = us;
			dev_remove_pack(us);
			break;
		}
	}
}

void
release_ip_rcv(void)
{
	if (ippt) {
		dev_remove_pack(&ourpt);
		dev_add_pack(ippt);
		ippt = NULL;
	}
}

#ifdef MODULE

#if LINUX_VERSION_CODE > KERNEL_VERSION(2,1,0) && LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)
extern int irq_desc_addr;
MODULE_PARM(irq_desc_addr, "i");
#endif

/* I don't know exactly when these various modules macros were defined;
 * the following is a rough cut...
 */
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,1,0)
MODULE_AUTHOR("Mark Carson <carson@antd.nist.gov>");
MODULE_DESCRIPTION("NIST Net network emulator");
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,4,10)
/* See the README.License file for why this "license" is included */
MODULE_LICENSE("GPL and additional rights");
#endif
#endif

int
init_module( void)
#else
int
nistnet_init(void)
#endif
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
#else
	void export_nistnet_symbols(void);
#endif

	if (register_chrdev(major, "hw", &hw_fops)) {
		printk("nistnet: register_chrdev failed: goodbye world :-(\n");
		return -EIO;
	}
	else
		printk("nistnet: Hello, world!\n");
		printk("nistnet:  I am a modified version of NIST Net.\n");
		printk("nistnet:  jnick@cs.wpi.edu modified my source code\n");
		printk("nistnet:  so that I would do droptail instead of the default\n");
		printk("nistnet:  DRD behavior.  To specify on the command line max_q:\n");
		printk("nistnet:    --drd 0 max_q\n"); 
	(void) install_fast_timer();
	fast_fill();
	lt_init();
	memset(&ourstats, 0, sizeof(ourstats));
	grab_ip_rcv();
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
#else
	export_nistnet_symbols();
#endif
	return 0;
}

#ifdef MODULE
void
cleanup_module(void)
{
	if (unregister_chrdev(major, "hw") != 0)
		printk("nistnet: cleanup_module failed\n");
	ourstats.emulator_on = 0;
	release_ip_rcv();
	lt_cleanup();
	fast_empty();
	if (uninstall_fast_timer() != 0) {
		printk("nistnet: uninstall_fast_timer failed\n");
		/* Well, we're in trouble now! */
	}
}
#endif

/* We allocate 1024 slots at startup, then allow for extra bunches
 * of 64 at a time to be allocated if needed.  (We keep the extra
 * allocations small, since they are done at interrupt time, from
 * presumably precious locked-down kernel buffers.)
 *
 * Hence, the initial memory requirement is around 36K, while the
 * maximum allowed usage is on the order of 616K (17344 packets),
 * not counting the space used up by all those extra sk_buffs hanging
 * around.
 *
 * If you really are planning to delay enormous numbers of packets,
 * you'd be better off making FAST_RESERVE larger, more or less
 * equal to the maximum number of packets you anticipate delaying.
 */
#define FAST_RESERVE 	1024
#define FAST_EMERGENCY	64
#define FAST_MAX	256

struct fast_timer_list *bigfastspace[FAST_MAX], *fast_stack;
struct nistnet_packetinfo *bighpispace[FAST_MAX];
int extra_count = 0;

void
fast_fill(void)
{
	struct fast_timer_list *newfast, *fastspace;
	struct nistnet_packetinfo *newhpi, *hpispace;
	int i, limit;

	if (!extra_count) {	/* First time, allocate a few pages */
		limit = FAST_RESERVE;
		fastspace = (struct fast_timer_list *)
			vmalloc(sizeof(struct fast_timer_list)*limit);
		if (!fastspace)
			return;
		hpispace = (struct nistnet_packetinfo *)
			vmalloc(sizeof(struct nistnet_packetinfo)*limit);
		if (!hpispace) {
			vfree(fastspace);
			return;
		}
	} else if (extra_count < FAST_MAX) {	/* subsequent times, go for fairly small chunks */
		limit = FAST_EMERGENCY;
		fastspace = (struct fast_timer_list *)
			kmalloc(sizeof(struct fast_timer_list)*limit, GFP_ATOMIC);
		if (!fastspace)
			return;
		hpispace = (struct nistnet_packetinfo *)
			kmalloc(sizeof(struct nistnet_packetinfo)*limit, GFP_ATOMIC);
		if (!hpispace) {
			our_kfree_s(fastspace, sizeof(struct fast_timer_list)*limit);
			return;
		}
	} else {			/* somebody got too greedy */
		return;
	}
	bigfastspace[extra_count] = fastspace;
	bighpispace[extra_count] = hpispace;
	++extra_count;

	for (i=0; i < limit; ++i) {
		newfast = fastspace+i;
		newhpi = hpispace+i;
		newfast->data = (unsigned long) newhpi;
		newfast->function = runpacket;
		newfast->next = fast_stack;
		fast_stack = newfast;
	}
}

void
fast_empty(void)
{
	int i;

	vfree((void *)bigfastspace[0]);
	vfree((void *)bighpispace[0]);
	for (i = 1; i < extra_count; ++i) {
		our_kfree_s(bigfastspace[i], sizeof(struct fast_timer_list)*FAST_EMERGENCY);
		our_kfree_s(bighpispace[i], sizeof(struct nistnet_packetinfo)*FAST_EMERGENCY);
	}
	extra_count = 0;
	fast_stack = NULL;
	memset((void *)bigfastspace, 0, sizeof(bigfastspace));
	memset((void *)bighpispace, 0, sizeof(bighpispace));
}

struct fast_timer_list *
fast_alloc(int how)
{
	struct fast_timer_list *answer = fast_stack;

	if (answer) {
		fast_stack = answer->next;
		return answer;
	}
	fast_fill();
	answer = fast_stack;
	if (answer)
		fast_stack = answer->next;
	return answer;
}

void
fast_free(struct fast_timer_list *done)
{
	done->next = fast_stack;
	fast_stack = done;
}

/* Export interfaces */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
EXPORT_SYMBOL_NOVERS(addmunge);
EXPORT_SYMBOL_NOVERS(rmmunge);
EXPORT_SYMBOL_NOVERS(AddNistnetMunger);
EXPORT_SYMBOL_NOVERS(RmNistnetMunger);
#else
static struct symbol_table nistnet_syms = {
#include <linux/symtab_begin.h>

	X(addmunge),
	X(rmmunge),
	X(AddNistnetMunger),
	X(RmNistnetMunger),

#include <linux/symtab_end.h>
};

void export_nistnet_symbols(void)
{
	register_symtab(&nistnet_syms);
}
#endif
